Updating Kokkos lib
This commit is contained in:
@ -1,5 +1,27 @@
|
||||
# Change Log
|
||||
|
||||
## [2.02.15](https://github.com/kokkos/kokkos/tree/2.02.15) (2017-02-10)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.07...2.02.15)
|
||||
|
||||
**Implemented enhancements:**
|
||||
|
||||
- Containers: Adding block partitioning to StaticCrsGraph [\#625](https://github.com/kokkos/kokkos/issues/625)
|
||||
- Kokkos Make System can induce Errors on Cray Volta System [\#610](https://github.com/kokkos/kokkos/issues/610)
|
||||
- OpenMP: error out if KOKKOS\_HAVE\_OPENMP is defined but not \_OPENMP [\#605](https://github.com/kokkos/kokkos/issues/605)
|
||||
- CMake: fix standalone build with tests [\#604](https://github.com/kokkos/kokkos/issues/604)
|
||||
- Change README \(that GitHub shows when opening Kokkos project page\) to tell users how to submit PRs [\#597](https://github.com/kokkos/kokkos/issues/597)
|
||||
- Add correctness testing for all operators of Atomic View [\#420](https://github.com/kokkos/kokkos/issues/420)
|
||||
- Allow assignment of Views with compatible memory spaces [\#290](https://github.com/kokkos/kokkos/issues/290)
|
||||
- Build only one version of Kokkos library for tests [\#213](https://github.com/kokkos/kokkos/issues/213)
|
||||
- Clean out old KOKKOS\_HAVE\_CXX11 macros clauses [\#156](https://github.com/kokkos/kokkos/issues/156)
|
||||
- Harmonize Macro names [\#150](https://github.com/kokkos/kokkos/issues/150)
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
- Cray and PGI: Kokkos\_Parallel\_Reduce [\#634](https://github.com/kokkos/kokkos/issues/634)
|
||||
- Kokkos Make System can induce Errors on Cray Volta System [\#610](https://github.com/kokkos/kokkos/issues/610)
|
||||
- Normal\(\) function random number generator doesn't give the expected distribution [\#592](https://github.com/kokkos/kokkos/issues/592)
|
||||
|
||||
## [2.02.07](https://github.com/kokkos/kokkos/tree/2.02.07) (2016-12-16)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.01...2.02.07)
|
||||
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
|
||||
IF(COMMAND TRIBITS_PACKAGE_DECL)
|
||||
SET(KOKKOS_HAS_TRILINOS ON CACHE BOOL "")
|
||||
ELSE()
|
||||
@ -8,6 +7,7 @@ ENDIF()
|
||||
IF(NOT KOKKOS_HAS_TRILINOS)
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.11 FATAL_ERROR)
|
||||
INCLUDE(cmake/tribits.cmake)
|
||||
SET(CMAKE_CXX_STANDARD 11)
|
||||
ENDIF()
|
||||
|
||||
#
|
||||
|
||||
@ -7,6 +7,8 @@
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Kokkos is licensed under 3-clause BSD terms of use:
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
|
||||
@ -5,9 +5,10 @@ KOKKOS_PATH=../../lib/kokkos
|
||||
CXXFLAGS=$(CCFLAGS)
|
||||
|
||||
#Options: OpenMP,Serial,Pthreads,Cuda
|
||||
<<<<<<< HEAD
|
||||
KOKKOS_DEVICES ?= "OpenMP"
|
||||
#KOKKOS_DEVICES ?= "Pthreads"
|
||||
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,KNL,BDW,SKX
|
||||
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,Power9,KNL,BDW,SKX
|
||||
KOKKOS_ARCH ?= ""
|
||||
#Options: yes,no
|
||||
KOKKOS_DEBUG ?= "no"
|
||||
@ -78,12 +79,6 @@ KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PG
|
||||
KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(CXX) --version 2>&1 | grep "nvcc" | wc -l)
|
||||
ifneq ($(OMPI_CXX),)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(OMPI_CXX) --version 2>&1 | grep "nvcc" | wc -l)
|
||||
endif
|
||||
ifneq ($(MPICH_CXX),)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(MPICH_CXX) --version 2>&1 | grep "nvcc" | wc -l)
|
||||
endif
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version 2>&1 | grep "clang" | wc -l)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
|
||||
@ -192,7 +187,8 @@ KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) |
|
||||
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power9 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
|
||||
|
||||
#AMD based
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
|
||||
@ -206,7 +202,7 @@ KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_US
|
||||
# Decide what ISA level we are able to support
|
||||
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))
|
||||
|
||||
#Incompatible flags?
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc ))
|
||||
@ -453,6 +449,17 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
else
|
||||
# Assume that this is a really a GNU compiler or it could be XL on P9
|
||||
KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9
|
||||
KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
|
||||
@ -5,6 +5,9 @@ Kokkos is designed to target complex node architectures with N-level memory
|
||||
hierarchies and multiple types of execution resources. It currently can use
|
||||
OpenMP, Pthreads and CUDA as backend programming models.
|
||||
|
||||
Kokkos is licensed under standard 3-clause BSD terms of use. For specifics
|
||||
see the LICENSE file contained in the repository or distribution.
|
||||
|
||||
The core developers of Kokkos are Carter Edwards and Christian Trott
|
||||
at the Computer Science Research Institute of the Sandia National
|
||||
Laboratories.
|
||||
@ -152,3 +155,11 @@ multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
|
||||
This will enforce proper UVM allocations, but can lead to errors if
|
||||
more than a single GPU is used by a single process.
|
||||
|
||||
===========================================================================
|
||||
====Contributing===========================================================
|
||||
===========================================================================
|
||||
|
||||
Contributions to Kokkos are welcome. In order to do so, please open an issue
|
||||
where a feature request or bug can be discussed. Then issue a pull request
|
||||
with your contribution. Pull requests must be issued against the develop branch.
|
||||
|
||||
|
||||
@ -1014,7 +1014,7 @@ namespace Kokkos {
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(KOKKOS_HAVE_CUDA) && defined(__CUDACC__)
|
||||
#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
|
||||
|
||||
template<>
|
||||
class Random_XorShift1024<Kokkos::Cuda> {
|
||||
|
||||
@ -49,7 +49,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <TestRandom.hpp>
|
||||
#include <TestSort.hpp>
|
||||
@ -106,5 +106,5 @@ CUDA_SORT_UNSIGNED(171)
|
||||
#undef CUDA_SORT_UNSIGNED
|
||||
}
|
||||
|
||||
#endif /* #ifdef KOKKOS_HAVE_CUDA */
|
||||
#endif /* #ifdef KOKKOS_ENABLE_CUDA */
|
||||
|
||||
|
||||
@ -52,7 +52,7 @@
|
||||
|
||||
namespace Test {
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
class openmp : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase()
|
||||
|
||||
@ -55,7 +55,7 @@
|
||||
|
||||
namespace Test {
|
||||
|
||||
#ifdef KOKKOS_HAVE_SERIAL
|
||||
#ifdef KOKKOS_ENABLE_SERIAL
|
||||
class serial : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase()
|
||||
@ -93,7 +93,7 @@ SERIAL_SORT_UNSIGNED(171)
|
||||
#undef SERIAL_RANDOM_XORSHIFT1024
|
||||
#undef SERIAL_SORT_UNSIGNED
|
||||
|
||||
#endif // KOKKOS_HAVE_SERIAL
|
||||
#endif // KOKKOS_ENABLE_SERIAL
|
||||
} // namespace Test
|
||||
|
||||
|
||||
|
||||
@ -55,7 +55,7 @@
|
||||
|
||||
namespace Test {
|
||||
|
||||
#ifdef KOKKOS_HAVE_PTHREAD
|
||||
#ifdef KOKKOS_ENABLE_PTHREAD
|
||||
class threads : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase()
|
||||
|
||||
@ -4,13 +4,31 @@ INCLUDE(CTest)
|
||||
cmake_policy(SET CMP0054 NEW)
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME})
|
||||
project(Kokkos)
|
||||
project(KokkosCMake)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED ${${PROJECT_NAME}_ENABLE_DEBUG}})
|
||||
MESSAGE(WARNING "The project name is: ${PROJECT_NAME}")
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP)
|
||||
SET(${PROJECT_NAME}_ENABLE_OpenMP OFF)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_DEBUG)
|
||||
SET(${PROJECT_NAME}_ENABLE_DEBUG OFF)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_CXX11)
|
||||
SET(${PROJECT_NAME}_ENABLE_CXX11 ON)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_TESTS)
|
||||
SET(${PROJECT_NAME}_ENABLE_TESTS OFF)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED TPL_ENABLE_Pthread)
|
||||
SET(TPL_ENABLE_Pthread OFF)
|
||||
ENDIF()
|
||||
|
||||
FUNCTION(ASSERT_DEFINED VARS)
|
||||
FOREACH(VAR ${VARS})
|
||||
IF(NOT DEFINED ${VAR})
|
||||
@ -70,9 +88,11 @@ ENDMACRO()
|
||||
|
||||
|
||||
MACRO(TRIBITS_ADD_TEST_DIRECTORIES)
|
||||
IF(${${PROJECT_NAME}_ENABLE_TESTS})
|
||||
FOREACH(TEST_DIR ${ARGN})
|
||||
ADD_SUBDIRECTORY(${TEST_DIR})
|
||||
ENDFOREACH()
|
||||
ENDIF()
|
||||
ENDMACRO()
|
||||
|
||||
MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES)
|
||||
@ -264,11 +284,11 @@ FUNCTION(TRIBITS_ADD_EXECUTABLE EXE_NAME)
|
||||
SET(EXE_BINARY_NAME ${PACKAGE_NAME}_${EXE_BINARY_NAME})
|
||||
ENDIF()
|
||||
|
||||
IF (PARSE_TESTONLY)
|
||||
SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL")
|
||||
ELSE()
|
||||
SET(EXCLUDE_FROM_ALL_KEYWORD)
|
||||
ENDIF()
|
||||
# IF (PARSE_TESTONLY)
|
||||
# SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL")
|
||||
# ELSE()
|
||||
# SET(EXCLUDE_FROM_ALL_KEYWORD)
|
||||
# ENDIF()
|
||||
ADD_EXECUTABLE(${EXE_BINARY_NAME} ${EXCLUDE_FROM_ALL_KEYWORD} ${EXE_SOURCES})
|
||||
|
||||
TARGET_LINK_AND_INCLUDE_LIBRARIES(${EXE_BINARY_NAME} ${LINK_LIBS})
|
||||
@ -470,9 +490,8 @@ ENDMACRO(TRIBITS_SUBPACKAGE_POSTPROCESS)
|
||||
|
||||
MACRO(TRIBITS_PACKAGE_DECL NAME)
|
||||
|
||||
PROJECT(${NAME})
|
||||
STRING(TOUPPER ${PROJECT_NAME} PROJECT_NAME_UC)
|
||||
SET(PACKAGE_NAME ${PROJECT_NAME})
|
||||
SET(PACKAGE_NAME ${NAME})
|
||||
SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC)
|
||||
|
||||
SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps")
|
||||
@ -489,7 +508,7 @@ MACRO(TRIBITS_PROCESS_SUBPACKAGES)
|
||||
FOREACH(SUBPACKAGE ${SUBPACKAGES})
|
||||
GET_FILENAME_COMPONENT(SUBPACKAGE_CMAKE ${SUBPACKAGE} DIRECTORY)
|
||||
GET_FILENAME_COMPONENT(SUBPACKAGE_DIR ${SUBPACKAGE_CMAKE} DIRECTORY)
|
||||
ADD_SUBDIRECTORY(${SUBPACKAGE_DIR})
|
||||
ADD_SUBDIRECTORY(${CMAKE_BINARY_DIR}/../${SUBPACKAGE_DIR})
|
||||
ENDFOREACH()
|
||||
ENDMACRO(TRIBITS_PROCESS_SUBPACKAGES)
|
||||
|
||||
|
||||
@ -4,3 +4,4 @@ tag: 2.01.10 date: 09:27:2016 master: e4119325 develop: e6cda11e
|
||||
tag: 2.02.00 date: 10:30:2016 master: 6c90a581 develop: ca3dd56e
|
||||
tag: 2.02.01 date: 11:01:2016 master: 9c698c86 develop: b0072304
|
||||
tag: 2.02.07 date: 12:16:2016 master: 4b4cc4ba develop: 382c0966
|
||||
tag: 2.02.15 date: 02:10:2017 master: 8c64cd93 develop: 28dea8b6
|
||||
|
||||
@ -10,12 +10,18 @@ set -o pipefail
|
||||
|
||||
MACHINE=""
|
||||
HOSTNAME=$(hostname)
|
||||
PROCESSOR=`uname -p`
|
||||
|
||||
if [[ "$HOSTNAME" =~ (white|ride).* ]]; then
|
||||
MACHINE=white
|
||||
elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then
|
||||
MACHINE=bowman
|
||||
elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
|
||||
if [[ "$PROCESSOR" = "aarch64" ]]; then
|
||||
MACHINE=sullivan
|
||||
else
|
||||
MACHINE=shepard
|
||||
fi
|
||||
elif [[ "$HOSTNAME" =~ apollo ]]; then
|
||||
MACHINE=apollo
|
||||
elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
|
||||
@ -27,6 +33,7 @@ fi
|
||||
|
||||
GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
|
||||
IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
ARM_GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
|
||||
CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
|
||||
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
|
||||
@ -200,6 +207,23 @@ elif [ "$MACHINE" = "bowman" ]; then
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=KNL"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
|
||||
elif [ "$MACHINE" = "sullivan" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=96
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS")
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=ARMv8-ThunderX"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
|
||||
elif [ "$MACHINE" = "shepard" ]; then
|
||||
@ -298,6 +322,7 @@ echo "--debug: Run tests in debug. Defaults to False"
|
||||
echo "--test-script: Test this script, not Kokkos"
|
||||
echo "--skip-hwloc: Do not do hwloc tests"
|
||||
echo "--num=N: Number of jobs to run in parallel"
|
||||
echo "--spot-check: Minimal test set to issue pull request"
|
||||
echo "--dry-run: Just print what would be executed"
|
||||
echo "--build-only: Just do builds, don't run anything"
|
||||
echo "--opt-flag=FLAG: Optimization flag (default: -O3)"
|
||||
|
||||
66
lib/kokkos/config/test_kokkos_master_develop_promotion.sh
Executable file
66
lib/kokkos/config/test_kokkos_master_develop_promotion.sh
Executable file
@ -0,0 +1,66 @@
|
||||
#!/bin/bash
|
||||
|
||||
. /etc/profile.d/modules.sh
|
||||
|
||||
echo "build-dir $1"
|
||||
echo "backend $2"
|
||||
echo "module $3"
|
||||
echo "compiler $4"
|
||||
echo "cxxflags $5"
|
||||
echo "architecrure $6"
|
||||
echo "debug $7"
|
||||
echo "kokkos-options $8"
|
||||
echo "kokkos-cuda-options $9"
|
||||
echo "hwloc $9"
|
||||
|
||||
NOW=`date "+%Y%m%d%H%M%S"`
|
||||
BASEDIR="$1-$NOW"
|
||||
|
||||
mkdir $BASEDIR
|
||||
cd $BASEDIR
|
||||
|
||||
module load $2
|
||||
|
||||
if [ $9 == "yes" ]; then
|
||||
if [ $7 == "debug" ]; then
|
||||
../generate_makefile.sh --with-devices=$2 \
|
||||
--compiler=$4 \
|
||||
--cxxflags=$5 \
|
||||
--arch=$6 \
|
||||
--debug \
|
||||
--with-options=$8 \
|
||||
--with-cuda-options=$9
|
||||
--with-hwloc=${HWLOC_ROOT}
|
||||
else
|
||||
../generate_makefile.sh --with-devices=$2 \
|
||||
--compiler=$4 \
|
||||
--cxxflags=$5 \
|
||||
--arch=$6 \
|
||||
--debug \
|
||||
--with-options=$8 \
|
||||
--with-cuda-options=$9
|
||||
--with-hwloc=${HWLOC_ROOT}
|
||||
fi
|
||||
else
|
||||
if [ $7 == "debug" ]; then
|
||||
../generate_makefile.sh --with-devices=$2 \
|
||||
--compiler=$4 \
|
||||
--cxxflags=$5 \
|
||||
--arch=$6 \
|
||||
--debug \
|
||||
--with-options=$8 \
|
||||
--with-cuda-options=$9
|
||||
else
|
||||
../generate_makefile.sh --with-devices=$2 \
|
||||
--compiler=$4 \
|
||||
--cxxflags=$5 \
|
||||
--arch=$6 \
|
||||
--debug \
|
||||
--with-options=$8 \
|
||||
--with-cuda-options=$9
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
make test
|
||||
return $?
|
||||
@ -27,13 +27,13 @@ cd ${TRILINOS_UPDATED_PATH}
|
||||
echo ""
|
||||
echo ""
|
||||
echo "Trilinos State:"
|
||||
git log --pretty=oneline --since=2.days
|
||||
SHA=`git log --pretty=oneline --since=2.days | head -n 2 | tail -n 1 | awk '{print $1}'`
|
||||
git log --pretty=oneline --since=7.days
|
||||
SHA=`git log --pretty=oneline --since=7.days | head -n 2 | tail -n 1 | awk '{print $1}'`
|
||||
cd ..
|
||||
|
||||
cd ${TRILINOS_PRISTINE_PATH}
|
||||
git status
|
||||
git log --pretty=oneline --since=2.days
|
||||
git log --pretty=oneline --since=7.days
|
||||
echo "Checkout develop"
|
||||
git checkout develop
|
||||
echo "Pull"
|
||||
@ -46,5 +46,5 @@ cd ${TRILINOS_PRISTINE_PATH}
|
||||
echo ""
|
||||
echo ""
|
||||
echo "Trilinos Pristine State:"
|
||||
git log --pretty=oneline --since=2.days
|
||||
git log --pretty=oneline --since=7.days
|
||||
cd ..
|
||||
|
||||
@ -52,7 +52,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <TestDynRankView.hpp>
|
||||
|
||||
@ -106,4 +106,4 @@ TEST_F( cuda, unordered_map_performance_far)
|
||||
|
||||
}
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
|
||||
@ -164,12 +164,10 @@ struct UnorderedMapTest
|
||||
|
||||
};
|
||||
|
||||
//#define KOKKOS_COLLECT_UNORDERED_MAP_METRICS
|
||||
|
||||
template <typename Device, bool Near>
|
||||
void run_performance_tests(std::string const & base_file_name)
|
||||
{
|
||||
#if defined(KOKKOS_COLLECT_UNORDERED_MAP_METRICS)
|
||||
#if 0
|
||||
std::string metrics_file_name = base_file_name + std::string("-metrics.csv");
|
||||
std::string length_file_name = base_file_name + std::string("-length.csv");
|
||||
std::string distance_file_name = base_file_name + std::string("-distance.csv");
|
||||
|
||||
@ -586,13 +586,13 @@ private:
|
||||
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
|
||||
|
||||
// rank of the calling operator - included as first argument in ARG
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
|
||||
Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds ARG ;
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
|
||||
|
||||
#endif
|
||||
@ -609,9 +609,9 @@ public:
|
||||
reference_type operator()() const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) )
|
||||
#endif
|
||||
return implementation_map().reference();
|
||||
//return m_map.reference(0,0,0,0,0,0,0);
|
||||
@ -650,9 +650,9 @@ public:
|
||||
operator()(const iType & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
return m_map.reference(i0);
|
||||
}
|
||||
@ -663,9 +663,9 @@ public:
|
||||
operator()(const iType & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
return m_map.reference(i0,0,0,0,0,0,0);
|
||||
}
|
||||
@ -677,9 +677,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1);
|
||||
}
|
||||
@ -690,9 +690,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,0,0,0,0,0);
|
||||
}
|
||||
@ -704,9 +704,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2);
|
||||
}
|
||||
@ -717,9 +717,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,0,0,0,0);
|
||||
}
|
||||
@ -731,9 +731,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3);
|
||||
}
|
||||
@ -744,9 +744,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,0,0,0);
|
||||
}
|
||||
@ -758,9 +758,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4);
|
||||
}
|
||||
@ -771,9 +771,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4,0,0);
|
||||
}
|
||||
@ -785,9 +785,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5);
|
||||
}
|
||||
@ -798,9 +798,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,0);
|
||||
}
|
||||
@ -812,14 +812,14 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
|
||||
}
|
||||
|
||||
#undef KOKKOS_VIEW_OPERATOR_VERIFY
|
||||
#undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
|
||||
|
||||
//----------------------------------------
|
||||
// Standard constructor, destructor, and assignment operators...
|
||||
@ -960,7 +960,7 @@ public:
|
||||
alloc_prop prop( arg_prop );
|
||||
|
||||
//------------------------------------------------------------
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
// If allocating in CudaUVMSpace must fence before and after
|
||||
// the allocation to protect against possible concurrent access
|
||||
// on the CPU and the GPU.
|
||||
@ -976,7 +976,7 @@ public:
|
||||
record = m_map.allocate_shared( prop , Impl::DynRankDimTraits<typename traits::specialize>::createLayout(arg_layout) );
|
||||
|
||||
//------------------------------------------------------------
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) {
|
||||
traits::device_type::memory_space::execution_space::fence();
|
||||
}
|
||||
|
||||
@ -51,6 +51,80 @@
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
namespace Impl {
|
||||
template<class RowOffsetsType, class RowBlockOffsetsType>
|
||||
struct StaticCrsGraphBalancerFunctor {
|
||||
typedef typename RowOffsetsType::non_const_value_type int_type;
|
||||
RowOffsetsType row_offsets;
|
||||
RowBlockOffsetsType row_block_offsets;
|
||||
|
||||
int_type cost_per_row, num_blocks;
|
||||
|
||||
StaticCrsGraphBalancerFunctor(RowOffsetsType row_offsets_,
|
||||
RowBlockOffsetsType row_block_offsets_,
|
||||
int_type cost_per_row_, int_type num_blocks_):
|
||||
row_offsets(row_offsets_),
|
||||
row_block_offsets(row_block_offsets_),
|
||||
cost_per_row(cost_per_row_),
|
||||
num_blocks(num_blocks_){}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int_type& iRow) const {
|
||||
const int_type num_rows = row_offsets.dimension_0()-1;
|
||||
const int_type num_entries = row_offsets(num_rows);
|
||||
const int_type total_cost = num_entries + num_rows*cost_per_row;
|
||||
|
||||
const double cost_per_workset = 1.0*total_cost/num_blocks;
|
||||
|
||||
const int_type row_cost = row_offsets(iRow+1)-row_offsets(iRow) + cost_per_row;
|
||||
|
||||
int_type count = row_offsets(iRow+1) + cost_per_row*iRow;
|
||||
|
||||
if(iRow == num_rows-1) row_block_offsets(num_blocks) = num_rows;
|
||||
|
||||
if(true) {
|
||||
int_type current_block = (count-row_cost-cost_per_row)/cost_per_workset;
|
||||
int_type end_block = count/cost_per_workset;
|
||||
|
||||
// Handle some corner cases for the last two blocks.
|
||||
if(current_block >= num_blocks-2) {
|
||||
if((current_block == num_blocks-2) && (count >= (current_block + 1) * cost_per_workset)) {
|
||||
int_type row = iRow;
|
||||
int_type cc = count-row_cost-cost_per_row;
|
||||
int_type block = cc/cost_per_workset;
|
||||
while((block>0) && (block==current_block)) {
|
||||
cc = row_offsets(row)+row*cost_per_row;
|
||||
block = cc/cost_per_workset;
|
||||
row--;
|
||||
}
|
||||
if((count-cc-row_cost-cost_per_row) < num_entries-row_offsets(iRow+1)) {
|
||||
row_block_offsets(current_block+1) = iRow+1;
|
||||
} else {
|
||||
row_block_offsets(current_block+1) = iRow;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if((count >= (current_block + 1) * cost_per_workset) ||
|
||||
(iRow+2 == row_offsets.dimension_0())) {
|
||||
if(end_block>current_block+1) {
|
||||
int_type num_block = end_block-current_block;
|
||||
row_block_offsets(current_block+1) = iRow;
|
||||
for(int_type block = current_block+2; block <= end_block; block++)
|
||||
if((block<current_block+2+(num_block-1)/2))
|
||||
row_block_offsets(block) = iRow;
|
||||
else
|
||||
row_block_offsets(block) = iRow+1;
|
||||
} else {
|
||||
row_block_offsets(current_block+1) = iRow+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// \class StaticCrsGraph
|
||||
/// \brief Compressed row storage array.
|
||||
///
|
||||
@ -100,19 +174,23 @@ public:
|
||||
typedef StaticCrsGraph< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror;
|
||||
typedef View< const size_type* , array_layout, device_type > row_map_type;
|
||||
typedef View< DataType* , array_layout, device_type > entries_type;
|
||||
typedef View< const size_type* , array_layout, device_type > row_block_type;
|
||||
|
||||
entries_type entries;
|
||||
row_map_type row_map;
|
||||
row_block_type row_block_offsets;
|
||||
|
||||
//! Construct an empty view.
|
||||
StaticCrsGraph () : entries(), row_map() {}
|
||||
StaticCrsGraph () : entries(), row_map(), row_block_offsets() {}
|
||||
|
||||
//! Copy constructor (shallow copy).
|
||||
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map)
|
||||
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map),
|
||||
row_block_offsets(rhs.row_block_offsets)
|
||||
{}
|
||||
|
||||
template<class EntriesType, class RowMapType>
|
||||
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_)
|
||||
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_),
|
||||
row_block_offsets()
|
||||
{}
|
||||
|
||||
/** \brief Assign to a view of the rhs array.
|
||||
@ -122,6 +200,7 @@ public:
|
||||
StaticCrsGraph& operator= (const StaticCrsGraph& rhs) {
|
||||
entries = rhs.entries;
|
||||
row_map = rhs.row_map;
|
||||
row_block_offsets = rhs.row_block_offsets;
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -130,12 +209,30 @@ public:
|
||||
*/
|
||||
~StaticCrsGraph() {}
|
||||
|
||||
/** \brief Return number of rows in the graph
|
||||
*/
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_type numRows() const {
|
||||
return (row_map.dimension_0 () != 0) ?
|
||||
row_map.dimension_0 () - static_cast<size_type> (1) :
|
||||
static_cast<size_type> (0);
|
||||
}
|
||||
|
||||
/** \brief Create a row partitioning into a given number of blocks
|
||||
* balancing non-zeros + a fixed cost per row.
|
||||
*/
|
||||
void create_block_partitioning(size_type num_blocks, size_type fix_cost_per_row = 4) {
|
||||
View< size_type* , array_layout, device_type >
|
||||
block_offsets("StatisCrsGraph::load_balance_offsets",num_blocks+1);
|
||||
|
||||
Impl::StaticCrsGraphBalancerFunctor<row_map_type,View< size_type* , array_layout, device_type > >
|
||||
partitioner(row_map,block_offsets,fix_cost_per_row,num_blocks);
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0,numRows()),partitioner);
|
||||
Kokkos::fence();
|
||||
|
||||
row_block_offsets = block_offsets;
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -72,7 +72,7 @@ private:
|
||||
|
||||
|
||||
public:
|
||||
#ifdef KOKKOS_CUDA_USE_UVM
|
||||
#ifdef KOKKOS_ENABLE_CUDA_UVM
|
||||
KOKKOS_INLINE_FUNCTION Scalar& operator() (int i) const {return DV::h_view(i);};
|
||||
KOKKOS_INLINE_FUNCTION Scalar& operator[] (int i) const {return DV::h_view(i);};
|
||||
#else
|
||||
|
||||
@ -133,11 +133,11 @@ uint32_t MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed )
|
||||
defined( __GNUG__ ) /* GNU C++ */ || \
|
||||
defined( __clang__ )
|
||||
|
||||
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
|
||||
#define KOKKOS_IMPL_MAY_ALIAS __attribute__((__may_alias__))
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_MAY_ALIAS
|
||||
#define KOKKOS_IMPL_MAY_ALIAS
|
||||
|
||||
#endif
|
||||
|
||||
@ -145,10 +145,10 @@ template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
@ -188,7 +188,7 @@ bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
|
||||
|
||||
|
||||
|
||||
#undef KOKKOS_MAY_ALIAS
|
||||
#undef KOKKOS_IMPL_MAY_ALIAS
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
|
||||
@ -69,15 +69,17 @@ create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view
|
||||
|
||||
typename staticcrsgraph_type::HostMirror tmp ;
|
||||
typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map);
|
||||
typename staticcrsgraph_type::row_block_type::HostMirror tmp_row_block_offsets = create_mirror( view.row_block_offsets);
|
||||
|
||||
// Allocation to match:
|
||||
tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const'
|
||||
tmp.entries = create_mirror( view.entries );
|
||||
|
||||
tmp.row_block_offsets = tmp_row_block_offsets ; // Assignment of 'const' from 'non-const'
|
||||
|
||||
// Deep copy:
|
||||
deep_copy( tmp_row_map , view.row_map );
|
||||
deep_copy( tmp.entries , view.entries );
|
||||
deep_copy( tmp_row_block_offsets , view.row_block_offsets );
|
||||
|
||||
return tmp ;
|
||||
}
|
||||
|
||||
@ -69,7 +69,7 @@
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
namespace Test {
|
||||
|
||||
@ -96,6 +96,18 @@ TEST_F( cuda , staticcrsgraph )
|
||||
{
|
||||
TestStaticCrsGraph::run_test_graph< Kokkos::Cuda >();
|
||||
TestStaticCrsGraph::run_test_graph2< Kokkos::Cuda >();
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 100000);
|
||||
}
|
||||
|
||||
|
||||
@ -225,5 +237,5 @@ TEST_F(cuda, ErrorReporter)
|
||||
|
||||
}
|
||||
|
||||
#endif /* #ifdef KOKKOS_HAVE_CUDA */
|
||||
#endif /* #ifdef KOKKOS_ENABLE_CUDA */
|
||||
|
||||
|
||||
@ -1298,7 +1298,7 @@ public:
|
||||
// For CUDA the constant random access View does not return
|
||||
// an lvalue reference due to retrieving through texture cache
|
||||
// therefore not allowed to query the underlying pointer.
|
||||
#if defined(KOKKOS_HAVE_CUDA)
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
if ( ! std::is_same< typename device::execution_space , Kokkos::Cuda >::value )
|
||||
#endif
|
||||
{
|
||||
@ -1408,7 +1408,7 @@ public:
|
||||
ASSERT_EQ( ds5.dimension_4() , ds5plus.dimension_4() );
|
||||
ASSERT_EQ( ds5.dimension_5() , ds5plus.dimension_5() );
|
||||
|
||||
#if ! defined( KOKKOS_HAVE_CUDA ) || defined ( KOKKOS_USE_CUDA_UVM )
|
||||
#if ! defined( KOKKOS_ENABLE_CUDA ) || defined ( KOKKOS_ENABLE_CUDA_UVM )
|
||||
ASSERT_EQ( & ds5(1,1,1,1,0) - & ds5plus(1,1,1,1,0) , 0 );
|
||||
ASSERT_EQ( & ds5(1,1,1,1,0,0) - & ds5plus(1,1,1,1,0,0) , 0 ); // passing argument to rank beyond the view's rank is allowed iff it is a 0.
|
||||
#endif
|
||||
|
||||
@ -200,7 +200,7 @@ struct ErrorReporterDriverUseLambda : public ErrorReporterDriverBase<DeviceType>
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
struct ErrorReporterDriverNativeOpenMP : public ErrorReporterDriverBase<Kokkos::OpenMP>
|
||||
{
|
||||
typedef ErrorReporterDriverBase<Kokkos::OpenMP> driver_base;
|
||||
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
namespace Test {
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
class openmp : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase()
|
||||
@ -109,6 +109,18 @@ TEST_F( openmp , staticcrsgraph )
|
||||
{
|
||||
TestStaticCrsGraph::run_test_graph< Kokkos::OpenMP >();
|
||||
TestStaticCrsGraph::run_test_graph2< Kokkos::OpenMP >();
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 100000);
|
||||
}
|
||||
|
||||
#define OPENMP_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
|
||||
|
||||
@ -45,7 +45,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if ! defined(KOKKOS_HAVE_SERIAL)
|
||||
#if ! defined(KOKKOS_ENABLE_SERIAL)
|
||||
# error "It doesn't make sense to build this file unless the Kokkos::Serial device is enabled. If you see this message, it probably means that there is an error in Kokkos' CMake build infrastructure."
|
||||
#else
|
||||
|
||||
@ -91,6 +91,18 @@ TEST_F( serial , staticcrsgraph )
|
||||
{
|
||||
TestStaticCrsGraph::run_test_graph< Kokkos::Serial >();
|
||||
TestStaticCrsGraph::run_test_graph2< Kokkos::Serial >();
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 100000);
|
||||
}
|
||||
|
||||
TEST_F( serial, complex )
|
||||
@ -178,6 +190,6 @@ TEST_F(serial, ErrorReporter)
|
||||
|
||||
} // namespace Test
|
||||
|
||||
#endif // KOKKOS_HAVE_SERIAL
|
||||
#endif // KOKKOS_ENABLE_SERIAL
|
||||
|
||||
|
||||
|
||||
@ -144,6 +144,44 @@ void run_test_graph2()
|
||||
}
|
||||
}
|
||||
|
||||
template< class Space >
|
||||
void run_test_graph3(size_t B, size_t N)
|
||||
{
|
||||
srand(10310);
|
||||
|
||||
typedef Kokkos::StaticCrsGraph< int , Space > dView ;
|
||||
typedef typename dView::HostMirror hView ;
|
||||
|
||||
const unsigned LENGTH = 2000 ;
|
||||
|
||||
std::vector< size_t > sizes( LENGTH );
|
||||
|
||||
size_t total_length = 0 ;
|
||||
|
||||
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
||||
sizes[i] = rand()%1000;
|
||||
}
|
||||
|
||||
sizes[1] = N;
|
||||
sizes[1998] = N;
|
||||
|
||||
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
||||
total_length += sizes[i];
|
||||
}
|
||||
|
||||
int C = 0;
|
||||
dView dx = Kokkos::create_staticcrsgraph<dView>( "test" , sizes );
|
||||
dx.create_block_partitioning(B,C);
|
||||
hView hx = Kokkos::create_mirror( dx );
|
||||
|
||||
for( size_t i = 0; i<B; i++) {
|
||||
size_t ne = 0;
|
||||
for(size_t j = hx.row_block_offsets(i); j<hx.row_block_offsets(i+1); j++)
|
||||
ne += hx.row_map(j+1)-hx.row_map(j)+C;
|
||||
|
||||
ASSERT_FALSE((ne>2*((hx.row_map(hx.numRows())+C*hx.numRows())/B))&&(hx.row_block_offsets(i+1)>hx.row_block_offsets(i)+1));
|
||||
}
|
||||
}
|
||||
} /* namespace TestStaticCrsGraph */
|
||||
|
||||
|
||||
|
||||
@ -45,7 +45,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
|
||||
#include <Kokkos_Bitset.hpp>
|
||||
#include <Kokkos_UnorderedMap.hpp>
|
||||
@ -106,6 +106,18 @@ TEST_F( threads , staticcrsgraph )
|
||||
{
|
||||
TestStaticCrsGraph::run_test_graph< Kokkos::Threads >();
|
||||
TestStaticCrsGraph::run_test_graph2< Kokkos::Threads >();
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 100000);
|
||||
}
|
||||
|
||||
/*TEST_F( threads, bitset )
|
||||
@ -190,5 +202,5 @@ TEST_F(threads, ErrorReporter)
|
||||
} // namespace Test
|
||||
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */
|
||||
|
||||
|
||||
@ -48,7 +48,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
@ -185,5 +185,5 @@ TEST_F( cuda, texture_double )
|
||||
|
||||
} // namespace Test
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
|
||||
|
||||
@ -51,8 +51,8 @@
|
||||
// macro, so I'm commenting out the macro to avoid compiler complaints
|
||||
// about an unused macro.
|
||||
|
||||
// #define KOKKOS_MACRO_IMPL_TO_STRING( X ) #X
|
||||
// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_MACRO_IMPL_TO_STRING( X )
|
||||
// #define KOKKOS_IMPL_MACRO_TO_STRING( X ) #X
|
||||
// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_IMPL_MACRO_TO_STRING( X )
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -45,17 +45,17 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
|
||||
typedef Kokkos::OpenMP TestHostDevice ;
|
||||
const char TestHostDeviceName[] = "Kokkos::OpenMP" ;
|
||||
|
||||
#elif defined( KOKKOS_HAVE_PTHREAD )
|
||||
#elif defined( KOKKOS_ENABLE_PTHREAD )
|
||||
|
||||
typedef Kokkos::Threads TestHostDevice ;
|
||||
const char TestHostDeviceName[] = "Kokkos::Threads" ;
|
||||
|
||||
#elif defined( KOKKOS_HAVE_SERIAL )
|
||||
#elif defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
typedef Kokkos::Serial TestHostDevice ;
|
||||
const char TestHostDeviceName[] = "Kokkos::Serial" ;
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <string>
|
||||
#include <Kokkos_Parallel.hpp>
|
||||
@ -112,7 +112,7 @@ CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type
|
||||
#if defined( __CUDACC__ )
|
||||
|
||||
/** \brief Access to constant memory on the device */
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
|
||||
__device__ __constant__
|
||||
extern unsigned long kokkos_impl_cuda_constant_memory_buffer[] ;
|
||||
@ -135,7 +135,7 @@ namespace Impl {
|
||||
}
|
||||
}
|
||||
__device__ __constant__
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
extern
|
||||
#endif
|
||||
Kokkos::Impl::CudaLockArraysStruct kokkos_impl_cuda_lock_arrays ;
|
||||
@ -245,7 +245,7 @@ struct CudaParallelLaunch< DriverType , true > {
|
||||
// Copy functor to constant memory on the device
|
||||
cudaMemcpyToSymbol( kokkos_impl_cuda_constant_memory_buffer , & driver , sizeof(DriverType) );
|
||||
|
||||
#ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
Kokkos::Impl::CudaLockArraysStruct locks;
|
||||
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
|
||||
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
|
||||
@ -287,7 +287,7 @@ struct CudaParallelLaunch< DriverType , false > {
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
Kokkos::Impl::CudaLockArraysStruct locks;
|
||||
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
|
||||
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
|
||||
@ -314,5 +314,5 @@ struct CudaParallelLaunch< DriverType , false > {
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* defined( __CUDACC__ ) */
|
||||
#endif /* defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */
|
||||
|
||||
@ -50,7 +50,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Cuda.hpp>
|
||||
@ -910,5 +910,5 @@ void* cuda_resize_scratch_space(size_t bytes, bool force_shrink) {
|
||||
|
||||
}
|
||||
}
|
||||
#endif // KOKKOS_HAVE_CUDA
|
||||
#endif // KOKKOS_ENABLE_CUDA
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
|
||||
@ -176,7 +176,7 @@ public:
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif //KOKKOS_HAVE_CUDA
|
||||
#endif //KOKKOS_ENABLE_CUDA
|
||||
|
||||
#endif // #ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
@ -65,5 +65,5 @@ inline void cuda_internal_safe_call( cudaError e , const char * name, const char
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif //KOKKOS_HAVE_CUDA
|
||||
#endif //KOKKOS_ENABLE_CUDA
|
||||
#endif //KOKKOS_CUDA_ERROR_HPP
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <Cuda/Kokkos_Cuda_Error.hpp>
|
||||
#include <Cuda/Kokkos_Cuda_Internal.hpp>
|
||||
@ -64,7 +64,7 @@
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
|
||||
__device__ __constant__
|
||||
unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long) ] ;
|
||||
@ -299,8 +299,8 @@ void CudaInternal::print_configuration( std::ostream & s ) const
|
||||
{
|
||||
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
s << "macro KOKKOS_HAVE_CUDA : defined" << std::endl ;
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
s << "macro KOKKOS_ENABLE_CUDA : defined" << std::endl ;
|
||||
#endif
|
||||
#if defined( CUDA_VERSION )
|
||||
s << "macro CUDA_VERSION = " << CUDA_VERSION
|
||||
@ -500,7 +500,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
|
||||
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_UVM
|
||||
#ifdef KOKKOS_ENABLE_CUDA_UVM
|
||||
if(!cuda_launch_blocking()) {
|
||||
std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl;
|
||||
std::cout << " without setting CUDA_LAUNCH_BLOCKING=1." << std::endl;
|
||||
@ -531,7 +531,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
|
||||
// Init the array for used for arbitrarily sized atomics
|
||||
Impl::init_lock_arrays_cuda_space();
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
Kokkos::Impl::CudaLockArraysStruct locks;
|
||||
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
|
||||
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
|
||||
@ -773,6 +773,6 @@ void Cuda::fence()
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif // KOKKOS_HAVE_CUDA
|
||||
#endif // KOKKOS_ENABLE_CUDA
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <Cuda/Kokkos_Cuda_Error.hpp>
|
||||
|
||||
@ -197,6 +197,6 @@ struct CudaGetOptBlockSize<DriverType,false> {
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif // KOKKOS_HAVE_CUDA
|
||||
#endif // KOKKOS_ENABLE_CUDA
|
||||
#endif /* #ifndef KOKKOS_CUDA_INTERNAL_HPP */
|
||||
|
||||
|
||||
@ -51,7 +51,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <utility>
|
||||
#include <Kokkos_Parallel.hpp>
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <utility>
|
||||
|
||||
@ -312,7 +312,7 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor ,
|
||||
( rtid_intra & 16 ) ? 16 : 0 ))));
|
||||
|
||||
if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ;
|
||||
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
|
||||
#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
|
||||
BLOCK_SCAN_STEP(tdata_intra,n,4) __syncthreads();//__threadfence_block();
|
||||
BLOCK_SCAN_STEP(tdata_intra,n,3) __syncthreads();//__threadfence_block();
|
||||
BLOCK_SCAN_STEP(tdata_intra,n,2) __syncthreads();//__threadfence_block();
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
#if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <impl/Kokkos_TaskQueue_impl.hpp>
|
||||
|
||||
@ -174,6 +174,6 @@ printf("cuda_task_queue_execute after\n");
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <Kokkos_Cuda.hpp>
|
||||
|
||||
@ -294,5 +294,5 @@ namespace Impl {
|
||||
|
||||
}
|
||||
|
||||
#endif // KOKKOS_HAVE_CUDA
|
||||
#endif // KOKKOS_ENABLE_CUDA
|
||||
#endif
|
||||
|
||||
@ -45,7 +45,7 @@
|
||||
#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
@ -144,7 +144,7 @@ struct CudaTextureFetch {
|
||||
{}
|
||||
};
|
||||
|
||||
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
|
||||
#if defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
|
||||
|
||||
template< typename ValueType , typename AliasType >
|
||||
struct CudaLDGFetch {
|
||||
@ -261,7 +261,7 @@ public:
|
||||
>::type
|
||||
>::type ;
|
||||
|
||||
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
|
||||
#if defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
|
||||
using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ;
|
||||
#else
|
||||
using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ;
|
||||
@ -301,6 +301,6 @@ public:
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
#include "Kokkos_Macros.hpp"
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <cuda.h>
|
||||
|
||||
@ -82,6 +82,6 @@ void cuda_abort( const char * const message )
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
#endif /* #if defined(__CUDACC__) && defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined(__CUDACC__) && defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */
|
||||
|
||||
|
||||
@ -48,8 +48,8 @@
|
||||
#include <Kokkos_Parallel.hpp>
|
||||
#include <initializer_list>
|
||||
|
||||
#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
|
||||
#define KOKKOS_MDRANGE_IVDEP
|
||||
#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_ENABLE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
|
||||
#define KOKKOS_IMPL_MDRANGE_IVDEP
|
||||
#endif
|
||||
|
||||
namespace Kokkos { namespace Experimental {
|
||||
@ -350,7 +350,7 @@ struct MDForFunctor
|
||||
|
||||
if ( MDRange::inner_direction == MDRange::Right ) {
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
@ -358,7 +358,7 @@ struct MDForFunctor
|
||||
}}
|
||||
} else {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
@ -396,7 +396,7 @@ struct MDForFunctor
|
||||
|
||||
if ( MDRange::inner_direction == MDRange::Right ) {
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
@ -404,7 +404,7 @@ struct MDForFunctor
|
||||
}}
|
||||
} else {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
@ -501,7 +501,7 @@ struct MDForFunctor
|
||||
if ( MDRange::inner_direction == MDRange::Right ) {
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i2=b2; i2<e2; ++i2) {
|
||||
@ -510,7 +510,7 @@ struct MDForFunctor
|
||||
} else {
|
||||
for (int i2=b2; i2<e2; ++i2) {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
@ -555,7 +555,7 @@ struct MDForFunctor
|
||||
if ( MDRange::inner_direction == MDRange::Right ) {
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i2=b2; i2<e2; ++i2) {
|
||||
@ -564,7 +564,7 @@ struct MDForFunctor
|
||||
} else {
|
||||
for (int i2=b2; i2<e2; ++i2) {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
|
||||
@ -41,8 +41,8 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_ARRAY
|
||||
#define KOKKOS_ARRAY
|
||||
#ifndef KOKKOS_ARRAY_HPP
|
||||
#define KOKKOS_ARRAY_HPP
|
||||
|
||||
#include <type_traits>
|
||||
#include <algorithm>
|
||||
@ -298,5 +298,5 @@ public:
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #ifndef KOKKOS_ARRAY */
|
||||
#endif /* #ifndef KOKKOS_ARRAY_HPP */
|
||||
|
||||
|
||||
@ -73,18 +73,18 @@
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
#if defined(_WIN32)
|
||||
#define KOKKOS_ATOMICS_USE_WINDOWS
|
||||
#define KOKKOS_ENABLE_WINDOWS_ATOMICS
|
||||
#else
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
// Compiling NVIDIA device code, must use Cuda atomics:
|
||||
|
||||
#define KOKKOS_ATOMICS_USE_CUDA
|
||||
#define KOKKOS_ENABLE_CUDA_ATOMICS
|
||||
#endif
|
||||
|
||||
#if ! defined( KOKKOS_ATOMICS_USE_GCC ) && \
|
||||
! defined( KOKKOS_ATOMICS_USE_INTEL ) && \
|
||||
! defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#if ! defined( KOKKOS_ENABLE_GNU_ATOMICS ) && \
|
||||
! defined( KOKKOS_ENABLE_INTEL_ATOMICS ) && \
|
||||
! defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
// Compiling for non-Cuda atomic implementation has not been pre-selected.
|
||||
// Choose the best implementation for the detected compiler.
|
||||
@ -94,16 +94,16 @@
|
||||
defined( KOKKOS_COMPILER_CLANG ) || \
|
||||
( defined ( KOKKOS_COMPILER_NVCC ) )
|
||||
|
||||
#define KOKKOS_ATOMICS_USE_GCC
|
||||
#define KOKKOS_ENABLE_GNU_ATOMICS
|
||||
|
||||
#elif defined( KOKKOS_COMPILER_INTEL ) || \
|
||||
defined( KOKKOS_COMPILER_CRAYC )
|
||||
|
||||
#define KOKKOS_ATOMICS_USE_INTEL
|
||||
#define KOKKOS_ENABLE_INTEL_ATOMICS
|
||||
|
||||
#elif defined( _OPENMP ) && ( 201107 <= _OPENMP )
|
||||
|
||||
#define KOKKOS_ATOMICS_USE_OMP31
|
||||
#define KOKKOS_ENABLE_OPENMP_ATOMICS
|
||||
|
||||
#else
|
||||
|
||||
@ -119,7 +119,7 @@
|
||||
// Forward decalaration of functions supporting arbitrary sized atomics
|
||||
// This is necessary since Kokkos_Atomic.hpp is internally included very early
|
||||
// through Kokkos_HostSpace.hpp as well as the allocation tracker.
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
/// \brief Aquire a lock for the address
|
||||
@ -127,7 +127,7 @@ namespace Impl {
|
||||
/// This function tries to aquire the lock for the hash value derived
|
||||
/// from the provided ptr. If the lock is successfully aquired the
|
||||
/// function returns true. Otherwise it returns false.
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
extern
|
||||
#endif
|
||||
__device__ inline
|
||||
@ -139,7 +139,7 @@ bool lock_address_cuda_space(void* ptr);
|
||||
/// from the provided ptr. This function should only be called
|
||||
/// after previously successfully aquiring a lock with
|
||||
/// lock_address.
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
extern
|
||||
#endif
|
||||
__device__ inline
|
||||
@ -170,16 +170,16 @@ namespace Kokkos {
|
||||
inline
|
||||
const char * atomic_query_version()
|
||||
{
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
return "KOKKOS_ATOMICS_USE_CUDA" ;
|
||||
#elif defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
return "KOKKOS_ATOMICS_USE_GCC" ;
|
||||
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
|
||||
return "KOKKOS_ATOMICS_USE_INTEL" ;
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
return "KOKKOS_ATOMICS_USE_OMP31" ;
|
||||
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
|
||||
return "KOKKOS_ATOMICS_USE_WINDOWS";
|
||||
#if defined( KOKKOS_ENABLE_CUDA_ATOMICS )
|
||||
return "KOKKOS_ENABLE_CUDA_ATOMICS" ;
|
||||
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
return "KOKKOS_ENABLE_GNU_ATOMICS" ;
|
||||
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
|
||||
return "KOKKOS_ENABLE_INTEL_ATOMICS" ;
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
return "KOKKOS_ENABLE_OPENMP_ATOMICS" ;
|
||||
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
|
||||
return "KOKKOS_ENABLE_WINDOWS_ATOMICS";
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -185,15 +185,15 @@ public:
|
||||
|
||||
typedef typename std::conditional
|
||||
< std::is_same< memory_space , Kokkos::HostSpace >::value
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|| std::is_same< memory_space , Kokkos::CudaUVMSpace >::value
|
||||
|| std::is_same< memory_space , Kokkos::CudaHostPinnedSpace >::value
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
, memory_space
|
||||
, Kokkos::HostSpace
|
||||
>::type host_memory_space ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
typedef typename std::conditional
|
||||
< std::is_same< execution_space , Kokkos::Cuda >::value
|
||||
, Kokkos::DefaultHostExecutionSpace , execution_space
|
||||
|
||||
@ -49,19 +49,19 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
#include <Kokkos_Serial.hpp>
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
#include <Kokkos_OpenMP.hpp>
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
#include <Kokkos_Threads.hpp>
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#include <Kokkos_Cuda.hpp>
|
||||
#endif
|
||||
|
||||
@ -74,9 +74,7 @@
|
||||
#include <Kokkos_hwloc.hpp>
|
||||
#include <Kokkos_Timer.hpp>
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
#include <Kokkos_Complex.hpp>
|
||||
#endif
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -83,25 +83,25 @@ namespace Kokkos {
|
||||
|
||||
class HostSpace ; ///< Memory space for main process and CPU execution spaces
|
||||
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
namespace Experimental {
|
||||
class HBWSpace ; /// Memory space for hbw_malloc from memkind (e.g. for KNL processor)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
class Serial ; ///< Execution space main process on CPU
|
||||
#endif // defined( KOKKOS_HAVE_SERIAL )
|
||||
#endif // defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
class Threads ; ///< Execution space with pthreads back-end
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
class OpenMP ; ///< OpenMP execution space
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
class CudaSpace ; ///< Memory space on Cuda GPU
|
||||
class CudaUVMSpace ; ///< Memory space on Cuda GPU with UVM
|
||||
class CudaHostPinnedSpace ; ///< Memory space on Host accessible to Cuda GPU
|
||||
@ -122,29 +122,29 @@ struct Device;
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||
typedef Cuda DefaultExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
typedef OpenMP DefaultExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
typedef Threads DefaultExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
typedef Serial DefaultExecutionSpace ;
|
||||
#else
|
||||
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
|
||||
#endif
|
||||
|
||||
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
typedef OpenMP DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
typedef Threads DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
typedef Serial DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_OPENMP )
|
||||
#elif defined ( KOKKOS_ENABLE_OPENMP )
|
||||
typedef OpenMP DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_PTHREAD )
|
||||
#elif defined ( KOKKOS_ENABLE_PTHREAD )
|
||||
typedef Threads DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_SERIAL )
|
||||
#elif defined ( KOKKOS_ENABLE_SERIAL )
|
||||
typedef Serial DefaultHostExecutionSpace ;
|
||||
#else
|
||||
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
|
||||
@ -161,7 +161,7 @@ namespace Kokkos {
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_HAVE_CUDA)
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_ENABLE_CUDA)
|
||||
typedef Kokkos::CudaSpace ActiveExecutionMemorySpace ;
|
||||
#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
typedef Kokkos::HostSpace ActiveExecutionMemorySpace ;
|
||||
|
||||
@ -48,7 +48,7 @@
|
||||
|
||||
// If CUDA execution space is enabled then use this header file.
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <iosfwd>
|
||||
#include <vector>
|
||||
@ -94,7 +94,7 @@ public:
|
||||
//! Tag this class as a kokkos execution space
|
||||
typedef Cuda execution_space ;
|
||||
|
||||
#if defined( KOKKOS_USE_CUDA_UVM )
|
||||
#if defined( KOKKOS_ENABLE_CUDA_UVM )
|
||||
//! This execution space's preferred memory space.
|
||||
typedef CudaUVMSpace memory_space ;
|
||||
#else
|
||||
@ -240,7 +240,7 @@ struct MemorySpaceAccess
|
||||
enum { deepcopy = false };
|
||||
};
|
||||
|
||||
#if defined( KOKKOS_USE_CUDA_UVM )
|
||||
#if defined( KOKKOS_ENABLE_CUDA_UVM )
|
||||
|
||||
// If forcing use of UVM everywhere
|
||||
// then must assume that CudaUVMSpace
|
||||
@ -297,7 +297,7 @@ struct VerifyExecutionCanAccessMemorySpace
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #ifndef KOKKOS_CUDA_HPP */
|
||||
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <iosfwd>
|
||||
#include <typeinfo>
|
||||
@ -939,6 +939,6 @@ public:
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #define KOKKOS_CUDASPACE_HPP */
|
||||
|
||||
|
||||
@ -48,7 +48,7 @@
|
||||
#include <Kokkos_HostSpace.hpp>
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
@ -102,15 +102,15 @@ public:
|
||||
/// Every memory space has a default execution space. This is
|
||||
/// useful for things like initializing a View (which happens in
|
||||
/// parallel using the View's default execution space).
|
||||
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
typedef Kokkos::OpenMP execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
typedef Kokkos::Threads execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_OPENMP )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP )
|
||||
typedef Kokkos::OpenMP execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_PTHREAD )
|
||||
#elif defined( KOKKOS_ENABLE_PTHREAD )
|
||||
typedef Kokkos::Threads execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_SERIAL )
|
||||
#elif defined( KOKKOS_ENABLE_SERIAL )
|
||||
typedef Kokkos::Serial execution_space ;
|
||||
#else
|
||||
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."
|
||||
|
||||
@ -108,15 +108,15 @@ public:
|
||||
/// Every memory space has a default execution space. This is
|
||||
/// useful for things like initializing a View (which happens in
|
||||
/// parallel using the View's default execution space).
|
||||
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
typedef Kokkos::OpenMP execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
typedef Kokkos::Threads execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_OPENMP )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP )
|
||||
typedef Kokkos::OpenMP execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_PTHREAD )
|
||||
#elif defined( KOKKOS_ENABLE_PTHREAD )
|
||||
typedef Kokkos::Threads execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_SERIAL )
|
||||
#elif defined( KOKKOS_ENABLE_SERIAL )
|
||||
typedef Kokkos::Serial execution_space ;
|
||||
#else
|
||||
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."
|
||||
|
||||
@ -47,23 +47,24 @@
|
||||
//----------------------------------------------------------------------------
|
||||
/** Pick up configure/build options via #define macros:
|
||||
*
|
||||
* KOKKOS_HAVE_CUDA Kokkos::Cuda execution and memory spaces
|
||||
* KOKKOS_HAVE_PTHREAD Kokkos::Threads execution space
|
||||
* KOKKOS_HAVE_QTHREAD Kokkos::Qthread execution space
|
||||
* KOKKOS_HAVE_OPENMP Kokkos::OpenMP execution space
|
||||
* KOKKOS_HAVE_HWLOC HWLOC library is available
|
||||
* KOKKOS_ENABLE_CUDA Kokkos::Cuda execution and memory spaces
|
||||
* KOKKOS_ENABLE_PTHREAD Kokkos::Threads execution space
|
||||
* KOKKOS_ENABLE_QTHREAD Kokkos::Qthread execution space
|
||||
* KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space
|
||||
* KOKKOS_ENABLE_HWLOC HWLOC library is available
|
||||
* KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK insert array bounds checks, is expensive!
|
||||
* KOKKOS_HAVE_CXX11 enable C++11 features
|
||||
*
|
||||
* KOKKOS_HAVE_MPI negotiate MPI/execution space interactions
|
||||
* KOKKOS_ENABLE_MPI negotiate MPI/execution space interactions
|
||||
*
|
||||
* KOKKOS_USE_CUDA_UVM Use CUDA UVM for Cuda memory space
|
||||
* KOKKOS_ENABLE_CUDA_UVM Use CUDA UVM for Cuda memory space
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
|
||||
#include <KokkosCore_config.h>
|
||||
#endif
|
||||
|
||||
#include <impl/Kokkos_OldMacros.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** Pick up compiler specific #define macros:
|
||||
*
|
||||
@ -80,10 +81,10 @@
|
||||
*
|
||||
* Macros for which compiler extension to use for atomics on intrinsice types
|
||||
*
|
||||
* KOKKOS_ATOMICS_USE_CUDA
|
||||
* KOKKOS_ATOMICS_USE_GNU
|
||||
* KOKKOS_ATOMICS_USE_INTEL
|
||||
* KOKKOS_ATOMICS_USE_OPENMP31
|
||||
* KOKKOS_ENABLE_CUDA_ATOMICS
|
||||
* KOKKOS_ENABLE_GNU_ATOMICS
|
||||
* KOKKOS_ENABLE_INTEL_ATOMICS
|
||||
* KOKKOS_ENABLE_OPENMP_ATOMICS
|
||||
*
|
||||
* A suite of 'KOKKOS_HAVE_PRAGMA_...' are defined for internal use.
|
||||
*
|
||||
@ -96,7 +97,7 @@
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ )
|
||||
#if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
|
||||
|
||||
/* Compiling with a CUDA compiler.
|
||||
*
|
||||
@ -126,7 +127,7 @@
|
||||
#error "Cuda device capability >= 3.0 is required"
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_LAMBDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA_LAMBDA
|
||||
#if ( CUDA_VERSION < 7050 )
|
||||
// CUDA supports C++11 lambdas generated in host code to be given
|
||||
// to the device starting with version 7.5. But the release candidate (7.5.6)
|
||||
@ -137,18 +138,18 @@
|
||||
#define KOKKOS_LAMBDA [=]__device__
|
||||
#else
|
||||
#define KOKKOS_LAMBDA [=]__host__ __device__
|
||||
#if defined( KOKKOS_HAVE_CXX1Z )
|
||||
#if defined( KOKKOS_ENABLE_CXX1Z )
|
||||
#define KOKKOS_CLASS_LAMBDA [=,*this] __host__ __device__
|
||||
#endif
|
||||
#endif
|
||||
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
|
||||
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
|
||||
#endif
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) */
|
||||
|
||||
|
||||
#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
|
||||
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
|
||||
// Cuda version 8.0 still needs the functor wrapper
|
||||
#if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__)
|
||||
#if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__)
|
||||
#define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
|
||||
#endif
|
||||
#endif
|
||||
@ -156,7 +157,7 @@
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/* Language info: C++, CUDA, OPENMP */
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
// Compiling Cuda code to 'ptx'
|
||||
|
||||
#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
|
||||
@ -185,21 +186,21 @@
|
||||
#define KOKKOS_COMPILER_NVCC __NVCC__
|
||||
|
||||
#else
|
||||
#if defined( KOKKOS_HAVE_CXX11 ) && ! defined( KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA )
|
||||
#if !defined (KOKKOS_HAVE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either
|
||||
#if ! defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
|
||||
#if !defined (KOKKOS_ENABLE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either
|
||||
// CUDA (including version 6.5) does not support giving lambdas as
|
||||
// arguments to global functions. Thus its not currently possible
|
||||
// to dispatch lambdas from the host.
|
||||
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
|
||||
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
|
||||
#endif
|
||||
#endif
|
||||
#endif /* #if defined( __NVCC__ ) */
|
||||
|
||||
#if defined( KOKKOS_HAVE_CXX11 ) && !defined (KOKKOS_LAMBDA)
|
||||
#if !defined (KOKKOS_LAMBDA)
|
||||
#define KOKKOS_LAMBDA [=]
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA)
|
||||
#if defined( KOKKOS_ENABLE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA)
|
||||
#define KOKKOS_CLASS_LAMBDA [=,*this]
|
||||
#endif
|
||||
|
||||
@ -259,11 +260,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_INTEL )
|
||||
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#define KOKKOS_RESTRICT __restrict__
|
||||
|
||||
@ -317,11 +318,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_IBM )
|
||||
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#endif
|
||||
|
||||
@ -330,11 +331,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_CLANG )
|
||||
|
||||
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
|
||||
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
|
||||
@ -347,11 +348,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_GNU )
|
||||
|
||||
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
|
||||
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
|
||||
@ -371,11 +372,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_PGI )
|
||||
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#endif
|
||||
|
||||
@ -384,7 +385,7 @@
|
||||
#if defined( KOKKOS_COMPILER_NVCC )
|
||||
|
||||
#if defined(__CUDA_ARCH__ )
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@ -426,19 +427,15 @@
|
||||
#define KOKKOS_ALIGN_PTR(size) __attribute__((aligned(size)))
|
||||
#endif
|
||||
|
||||
#if ! defined(KOKKOS_ALIGN_16)
|
||||
#define KOKKOS_ALIGN_16 KOKKOS_ALIGN(16)
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** Determine the default execution space for parallel dispatch.
|
||||
* There is zero or one default execution space specified.
|
||||
*/
|
||||
|
||||
#if 1 < ( ( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
|
||||
#if 1 < ( ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
|
||||
|
||||
#error "More than one KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_* specified" ;
|
||||
|
||||
@ -447,24 +444,24 @@
|
||||
/** If default is not specified then chose from enabled execution spaces.
|
||||
* Priority: CUDA, OPENMP, THREADS, SERIAL
|
||||
*/
|
||||
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
#elif defined ( KOKKOS_HAVE_CUDA )
|
||||
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#elif defined ( KOKKOS_HAVE_OPENMP )
|
||||
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#elif defined ( KOKKOS_HAVE_PTHREAD )
|
||||
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
#elif defined ( KOKKOS_ENABLE_CUDA )
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#elif defined ( KOKKOS_ENABLE_OPENMP )
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#elif defined ( KOKKOS_ENABLE_PTHREAD )
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#else
|
||||
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** Determine for what space the code is being compiled: */
|
||||
|
||||
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_HAVE_CUDA)
|
||||
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_ENABLE_CUDA)
|
||||
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA
|
||||
#else
|
||||
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
@ -476,7 +473,7 @@
|
||||
#if ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
|
||||
( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
|
||||
#if defined(KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN)
|
||||
#define KOKKOS_POSIX_MEMALIGN_AVAILABLE 1
|
||||
#define KOKKOS_ENABLE_POSIX_MEMALIGN 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -489,15 +486,6 @@
|
||||
#define KOKKOS_ENABLE_PROFILING 1
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
/* Transitional macro to change between old and new View
|
||||
* are no longer supported.
|
||||
*/
|
||||
|
||||
#define KOKKOS_USING_EXP_VIEW 1
|
||||
#define KOKKOS_USING_EXPERIMENTAL_VIEW
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -57,18 +57,18 @@
|
||||
|
||||
// How should errors be handled? In general, production code should return a
|
||||
// value indicating failure so the user can decide how the error is handled.
|
||||
// While experimental, code can abort instead. If KOKKOS_MEMPOOL_PRINTERR is
|
||||
// While experimental, code can abort instead. If KOKKOS_ENABLE_MEMPOOL_PRINTERR is
|
||||
// defined, the code will abort with an error message. Otherwise, the code will
|
||||
// return with a value indicating failure when possible, or do nothing instead.
|
||||
//#define KOKKOS_MEMPOOL_PRINTERR
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
|
||||
//#define KOKKOS_MEMPOOL_PRINT_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
//#define KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -451,7 +451,7 @@ struct create_histogram {
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
template < typename UInt32View, typename SBHeaderView, typename MempoolBitset >
|
||||
struct count_allocated_blocks {
|
||||
typedef typename UInt32View::execution_space execution_space;
|
||||
@ -790,7 +790,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
printf( "\n" );
|
||||
printf( " m_lg_sb_size: %12lu\n", m_lg_sb_size );
|
||||
printf( " m_sb_size: %12lu\n", m_sb_size );
|
||||
@ -810,7 +810,7 @@ public:
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
// Print the blocksize info for all the block sizes.
|
||||
printf( "SIZE BLOCKS_PER_SB PAGES_PER_SB SB_FULL_LEVEL PAGE_FULL_LEVEL\n" );
|
||||
for ( size_t i = 0; i < m_num_block_size; ++i ) {
|
||||
@ -845,7 +845,7 @@ public:
|
||||
uint32_t blocks_per_sb = m_blocksize_info[block_size_id].m_blocks_per_sb;
|
||||
uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb;
|
||||
|
||||
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
|
||||
#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
|
||||
// Without this test it looks like pages_per_sb might come back wrong.
|
||||
if ( pages_per_sb == 0 ) return NULL;
|
||||
#endif
|
||||
@ -966,7 +966,7 @@ public:
|
||||
|
||||
if ( new_sb_id == sb_id ) {
|
||||
allocation_done = true;
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
printf( "** No superblocks available. **\n" );
|
||||
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
fflush( stdout );
|
||||
@ -979,7 +979,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
else {
|
||||
printf( "** Requested allocation size (%zu) larger than superblock size (%lu). **\n",
|
||||
alloc_size, m_sb_size );
|
||||
@ -1068,7 +1068,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef KOKKOS_MEMPOOL_PRINTERR
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
else {
|
||||
printf( "\n** MemoryPool::deallocate() ADDRESS_OUT_OF_RANGE(0x%llx) **\n",
|
||||
reinterpret_cast<uint64_t>( alloc_ptr ) );
|
||||
@ -1109,7 +1109,7 @@ public:
|
||||
{
|
||||
printf( "\n" );
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
typename SBHeaderView::HostMirror host_sb_header = create_mirror_view( m_sb_header );
|
||||
deep_copy( host_sb_header, m_sb_header );
|
||||
|
||||
@ -1188,7 +1188,7 @@ public:
|
||||
num_active_sb += host_active(i) != INVALID_SUPERBLOCK;
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
// Print active superblocks.
|
||||
printf( "BS_ID SB_ID\n" );
|
||||
for ( size_t i = 0; i < m_num_block_size; ++i ) {
|
||||
@ -1208,7 +1208,7 @@ public:
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
// Print the summary page histogram.
|
||||
printf( "USED_BLOCKS PAGE_COUNT\n" );
|
||||
for ( uint32_t i = 0; i < 33; ++i ) {
|
||||
@ -1217,7 +1217,7 @@ public:
|
||||
printf( "\n" );
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
// Print the page histogram for a few individual superblocks.
|
||||
// const uint32_t num_sb_id = 2;
|
||||
// uint32_t sb_id[num_sb_id] = { 0, 10 };
|
||||
@ -1484,7 +1484,7 @@ private:
|
||||
// 1. An invalid superblock should never be found here.
|
||||
// 2. If the new superblock is the same as the previous superblock, the
|
||||
// allocator is empty.
|
||||
#ifdef KOKKOS_MEMPOOL_PRINTERR
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
if ( new_sb == INVALID_SUPERBLOCK ) {
|
||||
printf( "\n** MemoryPool::find_superblock() FOUND_INACTIVE_SUPERBLOCK **\n" );
|
||||
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
@ -1531,28 +1531,28 @@ private:
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINTERR
|
||||
#undef KOKKOS_MEMPOOL_PRINTERR
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#endif
|
||||
|
||||
#endif // KOKKOS_MEMORYPOOL_HPP
|
||||
|
||||
@ -46,14 +46,18 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP) && !defined(_OPENMP)
|
||||
#error "You enabled Kokkos OpenMP support without enabling OpenMP in the compiler!"
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP )
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <iosfwd>
|
||||
#include <Kokkos_HostSpace.hpp>
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
#include <Kokkos_HBWSpace.hpp>
|
||||
#endif
|
||||
#include <Kokkos_ScratchSpace.hpp>
|
||||
@ -77,7 +81,7 @@ public:
|
||||
|
||||
//! Tag this class as a kokkos execution space
|
||||
typedef OpenMP execution_space ;
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
typedef Experimental::HBWSpace memory_space ;
|
||||
#else
|
||||
typedef HostSpace memory_space ;
|
||||
@ -194,7 +198,7 @@ struct VerifyExecutionCanAccessMemorySpace
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP ) */
|
||||
#endif /* #ifndef KOKKOS_OPENMP_HPP */
|
||||
|
||||
|
||||
|
||||
@ -61,7 +61,7 @@
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <impl/Kokkos_FunctorAdapter.hpp>
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEBUG
|
||||
#ifdef KOKKOS_DEBUG
|
||||
#include<iostream>
|
||||
#endif
|
||||
|
||||
|
||||
@ -978,7 +978,7 @@ struct ParallelReduceReturnValue<typename std::enable_if<Kokkos::is_view<ReturnT
|
||||
typedef InvalidType reducer_type;
|
||||
|
||||
typedef typename return_type::value_type value_type_scalar;
|
||||
typedef typename return_type::value_type value_type_array[];
|
||||
typedef typename return_type::value_type* const value_type_array;
|
||||
|
||||
typedef typename if_c<return_type::rank==0,value_type_scalar,value_type_array>::type value_type;
|
||||
|
||||
|
||||
@ -106,14 +106,14 @@ public:
|
||||
void* tmp = m_iter_L0 + m_offset * align (size);
|
||||
if (m_end_L0 < (m_iter_L0 += align (size) * m_multiplier)) {
|
||||
m_iter_L0 -= align (size) * m_multiplier; // put it back like it was
|
||||
#ifdef KOKKOS_HAVE_DEBUG
|
||||
#ifdef KOKKOS_DEBUG
|
||||
// mfh 23 Jun 2015: printf call consumes 25 registers
|
||||
// in a CUDA build, so only print in debug mode. The
|
||||
// function still returns NULL if not enough memory.
|
||||
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
|
||||
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
|
||||
long(m_end_L0-m_iter_L0));
|
||||
#endif // KOKKOS_HAVE_DEBUG
|
||||
#endif // KOKKOS_DEBUG
|
||||
tmp = 0;
|
||||
}
|
||||
return tmp;
|
||||
@ -121,14 +121,14 @@ public:
|
||||
void* tmp = m_iter_L1 + m_offset * align (size);
|
||||
if (m_end_L1 < (m_iter_L1 += align (size) * m_multiplier)) {
|
||||
m_iter_L1 -= align (size) * m_multiplier; // put it back like it was
|
||||
#ifdef KOKKOS_HAVE_DEBUG
|
||||
#ifdef KOKKOS_DEBUG
|
||||
// mfh 23 Jun 2015: printf call consumes 25 registers
|
||||
// in a CUDA build, so only print in debug mode. The
|
||||
// function still returns NULL if not enough memory.
|
||||
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
|
||||
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
|
||||
long(m_end_L1-m_iter_L1));
|
||||
#endif // KOKKOS_HAVE_DEBUG
|
||||
#endif // KOKKOS_DEBUG
|
||||
tmp = 0;
|
||||
}
|
||||
return tmp;
|
||||
|
||||
@ -61,7 +61,7 @@
|
||||
|
||||
#include <KokkosExp_MDRangePolicy.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
@ -1005,7 +1005,7 @@ template<typename iType, class Lambda>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
|
||||
loop_boundaries, const Lambda& lambda) {
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
@ -1021,7 +1021,7 @@ KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
|
||||
loop_boundaries, const Lambda & lambda, ValueType& result) {
|
||||
result = ValueType();
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -1044,7 +1044,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::S
|
||||
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||
|
||||
ValueType result = init_result;
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -1075,7 +1075,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Ser
|
||||
|
||||
value_type scan_val = value_type();
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -1116,7 +1116,7 @@ void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const Func
|
||||
|
||||
#include <impl/Kokkos_Serial_Task.hpp>
|
||||
|
||||
#endif // defined( KOKKOS_HAVE_SERIAL )
|
||||
#endif // defined( KOKKOS_ENABLE_SERIAL )
|
||||
#endif /* #define KOKKOS_SERIAL_HPP */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -52,9 +52,9 @@
|
||||
// and use relocateable device code to enable the task policy.
|
||||
// nvcc relocatable device code option: --relocatable-device-code=true
|
||||
|
||||
#if ( defined( KOKKOS_HAVE_CUDA ) )
|
||||
#if ( defined( KOKKOS_ENABLE_CUDA ) )
|
||||
#if ( 8000 <= CUDA_VERSION ) && \
|
||||
defined( KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE )
|
||||
defined( KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE )
|
||||
|
||||
#define KOKKOS_ENABLE_TASKDAG
|
||||
|
||||
@ -63,7 +63,6 @@
|
||||
#define KOKKOS_ENABLE_TASKDAG
|
||||
#endif
|
||||
|
||||
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -90,6 +89,34 @@ class TaskScheduler ;
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/*\brief Implementation data for task data management, access, and execution.
|
||||
*
|
||||
* CRTP Inheritance structure to allow static_cast from the
|
||||
* task root type and a task's FunctorType.
|
||||
*
|
||||
* TaskBase< Space , ResultType , FunctorType >
|
||||
* : TaskBase< Space , ResultType , void >
|
||||
* , FunctorType
|
||||
* { ... };
|
||||
*
|
||||
* TaskBase< Space , ResultType , void >
|
||||
* : TaskBase< Space , void , void >
|
||||
* { ... };
|
||||
*/
|
||||
template< typename Space , typename ResultType , typename FunctorType >
|
||||
class TaskBase ;
|
||||
|
||||
template< typename Space >
|
||||
class TaskExec ;
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
/**
|
||||
@ -302,14 +329,6 @@ enum TaskPriority { TaskHighPriority = 0
|
||||
template< typename Space >
|
||||
void wait( TaskScheduler< Space > const & );
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -363,20 +382,7 @@ private:
|
||||
, Future< A1 , A2 > const & arg
|
||||
, Options const & ... opts )
|
||||
{
|
||||
// Assign dependence to task->m_next
|
||||
// which will be processed within subsequent call to schedule.
|
||||
// Error if the dependence is reset.
|
||||
|
||||
if ( 0 != Kokkos::atomic_exchange(& task->m_next, arg.m_task) ) {
|
||||
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
|
||||
}
|
||||
|
||||
if ( 0 != arg.m_task ) {
|
||||
// The future may be destroyed upon returning from this call
|
||||
// so increment reference count to track this assignment.
|
||||
Kokkos::atomic_increment( &(arg.m_task->m_ref_count) );
|
||||
}
|
||||
|
||||
task->add_dependence( arg.m_task );
|
||||
assign( task , opts ... );
|
||||
}
|
||||
|
||||
@ -558,8 +564,7 @@ public:
|
||||
// Potentially spawning outside execution space so the
|
||||
// apply function pointer must be obtained from execution space.
|
||||
// Required for Cuda execution space function pointer.
|
||||
queue_type::specialization::template
|
||||
proc_set_apply< FunctorType >( & f.m_task->m_apply );
|
||||
m_queue->template proc_set_apply< FunctorType >( & f.m_task->m_apply );
|
||||
|
||||
m_queue->schedule( f.m_task );
|
||||
}
|
||||
@ -638,25 +643,13 @@ public:
|
||||
, value_type
|
||||
, FunctorType > ;
|
||||
|
||||
task_base * const zero = (task_base *) 0 ;
|
||||
task_base * const lock = (task_base *) task_base::LockTag ;
|
||||
task_type * const task = static_cast< task_type * >( task_self );
|
||||
|
||||
// Precondition:
|
||||
// task is in Executing state
|
||||
// therefore m_next == LockTag
|
||||
//
|
||||
// Change to m_next == 0 for no dependence
|
||||
|
||||
if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) {
|
||||
Kokkos::abort("TaskScheduler::respawn ERROR: already respawned");
|
||||
}
|
||||
// Reschedule task with no dependences.
|
||||
m_queue->reschedule( task );
|
||||
|
||||
// Dependences, if requested, are added here through parsing the arguments.
|
||||
assign( task , arg_options... );
|
||||
|
||||
// Postcondition:
|
||||
// task is in Executing-Respawn state
|
||||
// therefore m_next == dependece or 0
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
@ -697,4 +690,3 @@ void wait( TaskScheduler< ExecSpace > const & policy )
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_TASKSCHEDULER_HPP */
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
|
||||
#include <cstddef>
|
||||
#include <iosfwd>
|
||||
@ -227,7 +227,7 @@ struct VerifyExecutionCanAccessMemorySpace
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */
|
||||
#endif /* #define KOKKOS_THREADS_HPP */
|
||||
|
||||
|
||||
|
||||
@ -47,10 +47,10 @@
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#undef KOKKOS_USE_LIBRT
|
||||
#undef KOKKOS_ENABLE_LIBRT
|
||||
#include <gettimeofday.c>
|
||||
#else
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifdef KOKKOS_ENABLE_LIBRT
|
||||
#include <ctime>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
@ -63,7 +63,7 @@ namespace Kokkos {
|
||||
|
||||
class Timer {
|
||||
private:
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifdef KOKKOS_ENABLE_LIBRT
|
||||
struct timespec m_old;
|
||||
#else
|
||||
struct timeval m_old ;
|
||||
@ -74,7 +74,7 @@ public:
|
||||
|
||||
inline
|
||||
void reset() {
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifdef KOKKOS_ENABLE_LIBRT
|
||||
clock_gettime(CLOCK_REALTIME, &m_old);
|
||||
#else
|
||||
gettimeofday( & m_old , ((struct timezone *) NULL ) );
|
||||
@ -90,7 +90,7 @@ public:
|
||||
inline
|
||||
double seconds() const
|
||||
{
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifdef KOKKOS_ENABLE_LIBRT
|
||||
struct timespec m_new;
|
||||
clock_gettime(CLOCK_REALTIME, &m_new);
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
#ifndef KOKKOS_VECTORIZATION_HPP
|
||||
#define KOKKOS_VECTORIZATION_HPP
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#include <Cuda/Kokkos_Cuda_Vectorization.hpp>
|
||||
#endif
|
||||
|
||||
|
||||
@ -623,13 +623,13 @@ private:
|
||||
|
||||
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
|
||||
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
|
||||
Kokkos::Impl::view_verify_operator_bounds ARG ;
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
|
||||
|
||||
#endif
|
||||
@ -647,9 +647,9 @@ public:
|
||||
operator()( Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference();
|
||||
@ -670,9 +670,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0);
|
||||
@ -692,9 +692,9 @@ public:
|
||||
{
|
||||
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 ];
|
||||
@ -713,9 +713,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
|
||||
@ -734,9 +734,9 @@ public:
|
||||
operator[]( const I0 & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0);
|
||||
@ -753,9 +753,9 @@ public:
|
||||
operator[]( const I0 & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 ];
|
||||
@ -772,9 +772,9 @@ public:
|
||||
operator[]( const I0 & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
|
||||
@ -795,9 +795,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1);
|
||||
@ -816,9 +816,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ];
|
||||
@ -837,9 +837,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ];
|
||||
@ -858,9 +858,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ];
|
||||
@ -879,9 +879,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ];
|
||||
@ -900,9 +900,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 +
|
||||
@ -924,9 +924,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ];
|
||||
@ -944,9 +944,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2);
|
||||
@ -967,9 +967,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ];
|
||||
@ -987,9 +987,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3);
|
||||
@ -1012,9 +1012,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ];
|
||||
@ -1034,9 +1034,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3,i4);
|
||||
@ -1059,9 +1059,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ];
|
||||
@ -1081,9 +1081,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5);
|
||||
@ -1106,9 +1106,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ];
|
||||
@ -1128,9 +1128,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
|
||||
@ -1153,9 +1153,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
|
||||
@ -1175,15 +1175,15 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7);
|
||||
}
|
||||
|
||||
#undef KOKKOS_VIEW_OPERATOR_VERIFY
|
||||
#undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
|
||||
|
||||
//----------------------------------------
|
||||
// Standard destructor, constructors, and assignment operators
|
||||
@ -1322,7 +1322,7 @@ public:
|
||||
alloc_prop prop( arg_prop );
|
||||
|
||||
//------------------------------------------------------------
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
// If allocating in CudaUVMSpace must fence before and after
|
||||
// the allocation to protect against possible concurrent access
|
||||
// on the CPU and the GPU.
|
||||
@ -1338,7 +1338,7 @@ public:
|
||||
record = m_map.allocate_shared( prop , arg_layout );
|
||||
|
||||
//------------------------------------------------------------
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) {
|
||||
traits::device_type::memory_space::execution_space::fence();
|
||||
}
|
||||
|
||||
@ -79,7 +79,7 @@ private:
|
||||
, const Member ibeg , const Member iend )
|
||||
{
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -96,7 +96,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -218,7 +218,7 @@ private:
|
||||
, reference_type update )
|
||||
{
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -236,7 +236,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -417,7 +417,7 @@ private:
|
||||
, reference_type update , const bool final )
|
||||
{
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -435,7 +435,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <impl/Kokkos_TaskQueue_impl.hpp>
|
||||
|
||||
@ -324,6 +324,6 @@ void TaskQueueSpecialization< Kokkos::OpenMP >::
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
|
||||
@ -51,7 +51,7 @@
|
||||
#include <impl/Kokkos_CPUDiscovery.hpp>
|
||||
#include <impl/Kokkos_Profiling_Interface.hpp>
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
@ -346,10 +346,10 @@ void OpenMP::print_configuration( std::ostream & s , const bool detail )
|
||||
|
||||
s << "Kokkos::OpenMP" ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
s << " KOKKOS_HAVE_OPENMP" ;
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
s << " KOKKOS_ENABLE_OPENMP" ;
|
||||
#endif
|
||||
#if defined( KOKKOS_HAVE_HWLOC )
|
||||
#if defined( KOKKOS_ENABLE_HWLOC )
|
||||
|
||||
const unsigned numa_count_ = Kokkos::hwloc::get_available_numa_count();
|
||||
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
|
||||
@ -405,4 +405,4 @@ int OpenMP::concurrency() {
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif //KOKKOS_HAVE_OPENMP
|
||||
#endif //KOKKOS_ENABLE_OPENMP
|
||||
|
||||
@ -83,7 +83,7 @@ private:
|
||||
// Which thread am I stealing from currently
|
||||
int m_current_steal_target;
|
||||
// This thread's owned work_range
|
||||
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN_16;
|
||||
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN(16);
|
||||
// Team Offset if one thread determines work_range for others
|
||||
long m_team_work_index;
|
||||
|
||||
@ -404,7 +404,6 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
template< class ValueType, class JoinOp >
|
||||
KOKKOS_INLINE_FUNCTION ValueType
|
||||
team_reduce( const ValueType & value
|
||||
@ -417,18 +416,6 @@ public:
|
||||
typedef ValueType value_type;
|
||||
const JoinLambdaAdapter<value_type,JoinOp> op(op_in);
|
||||
#endif
|
||||
#else // KOKKOS_HAVE_CXX11
|
||||
template< class JoinOp >
|
||||
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
|
||||
team_reduce( const typename JoinOp::value_type & value
|
||||
, const JoinOp & op ) const
|
||||
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
{ return typename JoinOp::value_type(); }
|
||||
#else
|
||||
{
|
||||
typedef typename JoinOp::value_type value_type;
|
||||
#endif
|
||||
#endif // KOKKOS_HAVE_CXX11
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
// Make sure there is enough scratch space:
|
||||
typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE
|
||||
@ -965,7 +952,7 @@ template<typename iType, class Lambda>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
|
||||
loop_boundaries, const Lambda& lambda) {
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
@ -981,7 +968,7 @@ KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
|
||||
loop_boundaries, const Lambda & lambda, ValueType& result) {
|
||||
result = ValueType();
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -1004,7 +991,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::O
|
||||
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||
|
||||
ValueType result = init_result;
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -1035,7 +1022,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Ope
|
||||
|
||||
value_type scan_val = value_type();
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_QTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_QTHREAD )
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@ -507,5 +507,5 @@ QthreadTeamPolicyMember::QthreadTeamPolicyMember( const QthreadTeamPolicyMember:
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_QTHREAD ) */
|
||||
|
||||
|
||||
@ -585,7 +585,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Qth
|
||||
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_CXX11 )
|
||||
|
||||
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||
*
|
||||
@ -610,8 +609,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Qth
|
||||
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
|
||||
}
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CXX11 ) */
|
||||
|
||||
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
|
||||
*
|
||||
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
|
||||
@ -620,7 +617,7 @@ template<typename iType, class Lambda>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
|
||||
loop_boundaries, const Lambda& lambda) {
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
@ -636,7 +633,7 @@ KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
|
||||
loop_boundaries, const Lambda & lambda, ValueType& result) {
|
||||
result = ValueType();
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -659,7 +656,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Q
|
||||
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||
|
||||
ValueType result = init_result;
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -690,7 +687,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Qth
|
||||
|
||||
value_type scan_val = value_type();
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
|
||||
@ -45,7 +45,7 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_QTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_QTHREAD )
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
@ -487,5 +487,5 @@ void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthread > & policy )
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_QTHREAD ) */
|
||||
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_WINTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD ) || defined( KOKKOS_ENABLE_WINTHREAD )
|
||||
|
||||
#include <stdint.h>
|
||||
#include <limits>
|
||||
@ -512,10 +512,10 @@ void ThreadsExec::print_configuration( std::ostream & s , const bool detail )
|
||||
|
||||
s << "Kokkos::Threads" ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
s << " KOKKOS_HAVE_PTHREAD" ;
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
s << " KOKKOS_ENABLE_PTHREAD" ;
|
||||
#endif
|
||||
#if defined( KOKKOS_HAVE_HWLOC )
|
||||
#if defined( KOKKOS_ENABLE_HWLOC )
|
||||
s << " hwloc[" << numa_count << "x" << cores_per_numa << "x" << threads_per_core << "]" ;
|
||||
#endif
|
||||
|
||||
@ -822,5 +822,5 @@ int Threads::thread_pool_rank()
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_WINTHREAD ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) || defined( KOKKOS_ENABLE_WINTHREAD ) */
|
||||
|
||||
|
||||
@ -103,7 +103,7 @@ private:
|
||||
// Which thread am I stealing from currently
|
||||
int m_current_steal_target;
|
||||
// This thread's owned work_range
|
||||
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN_16;
|
||||
Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN(16);
|
||||
// Team Offset if one thread determines work_range for others
|
||||
long m_team_work_index;
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
|
||||
/* Standard 'C' Linux libraries */
|
||||
|
||||
@ -148,11 +148,11 @@ void ThreadsExec::wait_yield( volatile int & flag , const int value )
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
/* end #if defined( KOKKOS_HAVE_PTHREAD ) */
|
||||
/* end #if defined( KOKKOS_ENABLE_PTHREAD ) */
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_HAVE_WINTHREAD )
|
||||
#elif defined( KOKKOS_ENABLE_WINTHREAD )
|
||||
|
||||
/* Windows libraries */
|
||||
#include <winsock2.h>
|
||||
@ -247,7 +247,7 @@ void ThreadsExec::wait_yield( volatile int & flag , const int value ) {}
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* end #elif defined( KOKKOS_HAVE_WINTHREAD ) */
|
||||
#endif /* end #elif defined( KOKKOS_ENABLE_WINTHREAD ) */
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -201,7 +201,6 @@ public:
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
template< class ValueType, class JoinOp >
|
||||
KOKKOS_INLINE_FUNCTION ValueType
|
||||
team_reduce( const ValueType & value
|
||||
@ -213,18 +212,6 @@ public:
|
||||
typedef ValueType value_type;
|
||||
const JoinLambdaAdapter<value_type,JoinOp> op(op_in);
|
||||
#endif
|
||||
#else // KOKKOS_HAVE_CXX11
|
||||
template< class JoinOp >
|
||||
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
|
||||
team_reduce( const typename JoinOp::value_type & value
|
||||
, const JoinOp & op ) const
|
||||
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
{ return typename JoinOp::value_type(); }
|
||||
#else
|
||||
{
|
||||
typedef typename JoinOp::value_type value_type;
|
||||
#endif
|
||||
#endif // KOKKOS_HAVE_CXX11
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
// Make sure there is enough scratch space:
|
||||
typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE
|
||||
@ -777,8 +764,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Thr
|
||||
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_CXX11 )
|
||||
|
||||
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||
*
|
||||
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
|
||||
@ -802,8 +787,6 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Thr
|
||||
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
|
||||
}
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CXX11 ) */
|
||||
|
||||
} //namespace Kokkos
|
||||
|
||||
|
||||
@ -816,7 +799,7 @@ template<typename iType, class Lambda>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >&
|
||||
loop_boundaries, const Lambda& lambda) {
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
@ -832,7 +815,7 @@ KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >&
|
||||
loop_boundaries, const Lambda & lambda, ValueType& result) {
|
||||
result = ValueType();
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -855,7 +838,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::T
|
||||
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||
|
||||
ValueType result = init_result;
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -886,7 +869,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Thr
|
||||
|
||||
value_type scan_val = value_type();
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
|
||||
@ -84,7 +84,7 @@ private:
|
||||
, const Member ibeg , const Member iend )
|
||||
{
|
||||
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
|
||||
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
|
||||
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for ( Member i = ibeg ; i < iend ; ++i ) {
|
||||
@ -100,7 +100,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
|
||||
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
|
||||
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for ( Member i = ibeg ; i < iend ; ++i ) {
|
||||
@ -309,7 +309,7 @@ private:
|
||||
, reference_type update )
|
||||
{
|
||||
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
|
||||
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
|
||||
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for ( Member i = ibeg ; i < iend ; ++i ) {
|
||||
@ -326,7 +326,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
|
||||
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
|
||||
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for ( Member i = ibeg ; i < iend ; ++i ) {
|
||||
@ -585,7 +585,7 @@ private:
|
||||
, reference_type update , const bool final )
|
||||
{
|
||||
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
|
||||
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
|
||||
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for ( Member i = ibeg ; i < iend ; ++i ) {
|
||||
@ -602,7 +602,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \
|
||||
defined( KOKKOS_HAVE_PRAGMA_IVDEP )
|
||||
defined( KOKKOS_ENABLE_PRAGMA_IVDEP )
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for ( Member i = ibeg ; i < iend ; ++i ) {
|
||||
|
||||
@ -86,7 +86,7 @@ namespace Impl {
|
||||
__attribute__ (( __aligned__( 16 ) ));
|
||||
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
inline cas128_t cas128( volatile cas128_t * ptr, cas128_t cmp, cas128_t swap )
|
||||
{
|
||||
bool swapped = false;
|
||||
|
||||
@ -50,9 +50,9 @@ namespace Kokkos {
|
||||
// Cuda native CAS supports int, unsigned int, and unsigned long long int (non-standard type).
|
||||
// Must cast-away 'volatile' for the CAS call.
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
__inline__ __device__
|
||||
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
|
||||
{ return atomicCAS((int*)dest,compare,val); }
|
||||
@ -120,8 +120,8 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
|
||||
//----------------------------------------------------------------------------
|
||||
// GCC native CAS supports int, long, unsigned int, unsigned long.
|
||||
// Intel native CAS support int and long with the same interface as GCC.
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
inline
|
||||
int atomic_compare_exchange( volatile int * const dest, const int compare, const int val)
|
||||
@ -131,7 +131,7 @@ inline
|
||||
long atomic_compare_exchange( volatile long * const dest, const long compare, const long val )
|
||||
{ return __sync_val_compare_and_swap(dest,compare,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
// GCC supports unsigned
|
||||
|
||||
@ -152,18 +152,11 @@ inline
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp ;
|
||||
#else
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
} tmp ;
|
||||
#endif
|
||||
|
||||
tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) );
|
||||
return tmp.t ;
|
||||
@ -175,24 +168,17 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T & >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} tmp ;
|
||||
#else
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
} tmp ;
|
||||
#endif
|
||||
|
||||
tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) );
|
||||
return tmp.t ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_compare_exchange( volatile T * const dest, const T & compare,
|
||||
@ -217,7 +203,7 @@ T atomic_compare_exchange( volatile T * const dest , const T compare ,
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
@ -245,7 +231,7 @@ T atomic_compare_exchange( volatile T * const dest , const T compare ,
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
@ -41,8 +41,8 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT )
|
||||
#define KOKKOS_ATOMIC_DECREMENT
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT_HPP )
|
||||
#define KOKKOS_ATOMIC_DECREMENT_HPP
|
||||
|
||||
#include "impl/Kokkos_Atomic_Fetch_Sub.hpp"
|
||||
|
||||
@ -52,7 +52,7 @@ namespace Kokkos {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<char>(volatile char* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decb %0"
|
||||
: /* no output registers */
|
||||
@ -67,7 +67,7 @@ void atomic_decrement<char>(volatile char* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<short>(volatile short* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decw %0"
|
||||
: /* no output registers */
|
||||
@ -82,7 +82,7 @@ void atomic_decrement<short>(volatile short* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<int>(volatile int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decl %0"
|
||||
: /* no output registers */
|
||||
@ -97,7 +97,7 @@ void atomic_decrement<int>(volatile int* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_decrement<long long int>(volatile long long int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock decq %0"
|
||||
: /* no output registers */
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
__inline__ __device__
|
||||
int atomic_exchange( volatile int * const dest , const int val )
|
||||
@ -162,8 +162,8 @@ void atomic_assign(
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
template< typename T >
|
||||
inline
|
||||
@ -177,15 +177,11 @@ T atomic_exchange( volatile T * const dest ,
|
||||
|
||||
type assumed ;
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
T val_T ;
|
||||
type val_type ;
|
||||
inline U() {};
|
||||
} old ;
|
||||
#else
|
||||
union { T val_T ; type val_type ; } old ;
|
||||
#endif
|
||||
|
||||
old.val_T = *dest ;
|
||||
|
||||
@ -197,7 +193,7 @@ T atomic_exchange( volatile T * const dest ,
|
||||
return old.val_T ;
|
||||
}
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
template< typename T >
|
||||
inline
|
||||
T atomic_exchange( volatile T * const dest ,
|
||||
@ -230,7 +226,7 @@ T atomic_exchange( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
@ -267,15 +263,11 @@ void atomic_assign( volatile T * const dest ,
|
||||
|
||||
type assumed ;
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
T val_T ;
|
||||
type val_type ;
|
||||
inline U() {};
|
||||
} old ;
|
||||
#else
|
||||
union { T val_T ; type val_type ; } old ;
|
||||
#endif
|
||||
|
||||
old.val_T = *dest ;
|
||||
|
||||
@ -285,7 +277,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
} while ( assumed != old.val_type );
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
template< typename T >
|
||||
inline
|
||||
void atomic_assign( volatile T * const dest ,
|
||||
@ -313,7 +305,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
@ -331,7 +323,7 @@ void atomic_assign( volatile T * const dest ,
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template < typename T >
|
||||
inline
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -81,18 +81,11 @@ __inline__ __device__
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -111,18 +104,11 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(unsigned long long int) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
unsigned long long int i ;
|
||||
T t ;
|
||||
KOKKOS_INLINE_FUNCTION U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
unsigned long long int i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -167,10 +153,10 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
inline
|
||||
int atomic_fetch_add( volatile int * dest , const int val )
|
||||
{
|
||||
@ -195,7 +181,7 @@ inline
|
||||
long int atomic_fetch_add( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_add(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
inline
|
||||
unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val )
|
||||
@ -212,18 +198,11 @@ inline
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
inline U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
int i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -242,18 +221,11 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) &&
|
||||
sizeof(T) == sizeof(long) , const T >::type val )
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
inline U() {};
|
||||
} assume , oldval , newval ;
|
||||
#else
|
||||
union U {
|
||||
long i ;
|
||||
T t ;
|
||||
} assume , oldval , newval ;
|
||||
#endif
|
||||
|
||||
oldval.t = *dest ;
|
||||
|
||||
@ -266,7 +238,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
return oldval.t ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
template < typename T >
|
||||
inline
|
||||
T atomic_fetch_add( volatile T * const dest ,
|
||||
@ -300,7 +272,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
typename Kokkos::Impl::enable_if<
|
||||
( sizeof(T) != 4 )
|
||||
&& ( sizeof(T) != 8 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
&& ( sizeof(T) != 16 )
|
||||
#endif
|
||||
, const T >::type& val )
|
||||
@ -324,7 +296,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
}
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_add( volatile T * const dest , const T val )
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -70,8 +70,8 @@ unsigned long long int atomic_fetch_and( volatile unsigned long long int * const
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
inline
|
||||
int atomic_fetch_and( volatile int * const dest , const int val )
|
||||
@ -81,7 +81,7 @@ inline
|
||||
long int atomic_fetch_and( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_and(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
inline
|
||||
unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val )
|
||||
@ -95,7 +95,7 @@ unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , co
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_and( volatile T * const dest , const T val )
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -70,8 +70,8 @@ unsigned long long int atomic_fetch_or( volatile unsigned long long int * const
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
inline
|
||||
int atomic_fetch_or( volatile int * const dest , const int val )
|
||||
@ -81,7 +81,7 @@ inline
|
||||
long int atomic_fetch_or( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_or(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
inline
|
||||
unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val )
|
||||
@ -95,7 +95,7 @@ unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , con
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_or( volatile T * const dest , const T val )
|
||||
|
||||
@ -48,8 +48,8 @@ namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
|
||||
// Support for int, unsigned int, unsigned long long int, and float
|
||||
|
||||
@ -130,8 +130,8 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
#endif
|
||||
#endif
|
||||
//----------------------------------------------------------------------------
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ATOMICS_USE_GCC) || defined(KOKKOS_ATOMICS_USE_INTEL)
|
||||
#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND)
|
||||
#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS)
|
||||
|
||||
inline
|
||||
int atomic_fetch_sub( volatile int * const dest , const int val )
|
||||
@ -141,7 +141,7 @@ inline
|
||||
long int atomic_fetch_sub( volatile long int * const dest , const long int val )
|
||||
{ return __sync_fetch_and_sub(dest,val); }
|
||||
|
||||
#if defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
#if defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
|
||||
inline
|
||||
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
|
||||
@ -210,7 +210,7 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
template< typename T >
|
||||
T atomic_fetch_sub( volatile T * const dest , const T val )
|
||||
|
||||
@ -41,8 +41,8 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT )
|
||||
#define KOKKOS_ATOMIC_INCREMENT
|
||||
#if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT_HPP )
|
||||
#define KOKKOS_ATOMIC_INCREMENT_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
@ -50,7 +50,7 @@ namespace Kokkos {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<char>(volatile char* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incb %0"
|
||||
: /* no output registers */
|
||||
@ -65,7 +65,7 @@ void atomic_increment<char>(volatile char* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<short>(volatile short* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incw %0"
|
||||
: /* no output registers */
|
||||
@ -80,7 +80,7 @@ void atomic_increment<short>(volatile short* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<int>(volatile int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incl %0"
|
||||
: /* no output registers */
|
||||
@ -95,7 +95,7 @@ void atomic_increment<int>(volatile int* a) {
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void atomic_increment<long long int>(volatile long long int* a) {
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__)
|
||||
__asm__ __volatile__(
|
||||
"lock incq %0"
|
||||
: /* no output registers */
|
||||
|
||||
@ -70,20 +70,20 @@ void initialize_internal(const InitArguments& args)
|
||||
// This is an experimental setting
|
||||
// For KNL in Flat mode this variable should be set, so that
|
||||
// memkind allocates high bandwidth memory correctly.
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
#endif
|
||||
|
||||
// Protect declarations, to prevent "unused variable" warnings.
|
||||
#if defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_PTHREAD )
|
||||
const int num_threads = args.num_threads;
|
||||
const int use_numa = args.num_numa;
|
||||
#endif // defined( KOKKOS_HAVE_OPENMP ) || defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#endif // defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
const int use_gpu = args.device_id;
|
||||
#endif // defined( KOKKOS_HAVE_CUDA )
|
||||
#endif // defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if(num_threads>0) {
|
||||
@ -103,7 +103,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
if(num_threads>0) {
|
||||
@ -123,7 +123,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
// Prevent "unused variable" warning for 'args' input struct. If
|
||||
// Serial::initialize() ever needs to take arguments from the input
|
||||
// struct, you may remove this line of code.
|
||||
@ -135,7 +135,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) {
|
||||
if (use_gpu > -1) {
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( use_gpu ) );
|
||||
@ -159,14 +159,14 @@ void finalize_internal( const bool all_spaces = false )
|
||||
Kokkos::Profiling::finalize();
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || all_spaces ) {
|
||||
if(Kokkos::Cuda::is_initialized())
|
||||
Kokkos::Cuda::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
@ -175,7 +175,7 @@ void finalize_internal( const bool all_spaces = false )
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
@ -184,7 +184,7 @@ void finalize_internal( const bool all_spaces = false )
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
@ -197,27 +197,27 @@ void finalize_internal( const bool all_spaces = false )
|
||||
void fence_internal()
|
||||
{
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) {
|
||||
Kokkos::Cuda::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::OpenMP::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Threads::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Serial::fence();
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <string>
|
||||
#include <iosfwd>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
#include <Cuda/Kokkos_Cuda_abort.hpp>
|
||||
#endif
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
#include <Kokkos_HBWSpace.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
#include <memkind.h>
|
||||
#endif
|
||||
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
@ -48,17 +48,17 @@
|
||||
#endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
// Intel specialized allocator does not interoperate with CUDA memory allocation
|
||||
|
||||
#define KOKKOS_INTEL_MM_ALLOC_AVAILABLE
|
||||
#define KOKKOS_ENABLE_INTEL_MM_ALLOC
|
||||
|
||||
#endif
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
|
||||
#if defined(KOKKOS_ENABLE_POSIX_MEMALIGN)
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
@ -66,18 +66,18 @@
|
||||
/* mmap flags for private anonymous memory allocation */
|
||||
|
||||
#if defined( MAP_ANONYMOUS ) && defined( MAP_PRIVATE )
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
|
||||
#elif defined( MAP_ANON ) && defined( MAP_PRIVATE )
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS (MAP_PRIVATE | MAP_ANON)
|
||||
#endif
|
||||
|
||||
// mmap flags for huge page tables
|
||||
// the Cuda driver does not interoperate with MAP_HUGETLB
|
||||
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#if defined( MAP_HUGETLB ) && ! defined( KOKKOS_HAVE_CUDA )
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE (KOKKOS_POSIX_MMAP_FLAGS | MAP_HUGETLB )
|
||||
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
#if defined( MAP_HUGETLB ) && ! defined( KOKKOS_ENABLE_CUDA )
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE (KOKKOS_IMPL_POSIX_MMAP_FLAGS | MAP_HUGETLB )
|
||||
#else
|
||||
#define KOKKOS_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE KOKKOS_IMPL_POSIX_MMAP_FLAGS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -162,11 +162,11 @@ namespace Kokkos {
|
||||
/* Default allocation mechanism */
|
||||
HostSpace::HostSpace()
|
||||
: m_alloc_mech(
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
|
||||
HostSpace::INTEL_MM_ALLOC
|
||||
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#elif defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
HostSpace::POSIX_MMAP
|
||||
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
#elif defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
|
||||
HostSpace::POSIX_MEMALIGN
|
||||
#else
|
||||
HostSpace::STD_MALLOC
|
||||
@ -181,15 +181,15 @@ HostSpace::HostSpace( const HostSpace::AllocationMechanism & arg_alloc_mech )
|
||||
if ( arg_alloc_mech == STD_MALLOC ) {
|
||||
m_alloc_mech = HostSpace::STD_MALLOC ;
|
||||
}
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
|
||||
else if ( arg_alloc_mech == HostSpace::INTEL_MM_ALLOC ) {
|
||||
m_alloc_mech = HostSpace::INTEL_MM_ALLOC ;
|
||||
}
|
||||
#elif defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
#elif defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
|
||||
else if ( arg_alloc_mech == HostSpace::POSIX_MEMALIGN ) {
|
||||
m_alloc_mech = HostSpace::POSIX_MEMALIGN ;
|
||||
}
|
||||
#elif defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#elif defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
else if ( arg_alloc_mech == HostSpace::POSIX_MMAP ) {
|
||||
m_alloc_mech = HostSpace::POSIX_MMAP ;
|
||||
}
|
||||
@ -244,25 +244,25 @@ void * HostSpace::allocate( const size_t arg_alloc_size ) const
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
|
||||
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
|
||||
ptr = _mm_malloc( arg_alloc_size , alignment );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
|
||||
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
|
||||
posix_memalign( & ptr, alignment , arg_alloc_size );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
else if ( m_alloc_mech == POSIX_MMAP ) {
|
||||
constexpr size_t use_huge_pages = (1u << 27);
|
||||
constexpr int prot = PROT_READ | PROT_WRITE ;
|
||||
const int flags = arg_alloc_size < use_huge_pages
|
||||
? KOKKOS_POSIX_MMAP_FLAGS
|
||||
: KOKKOS_POSIX_MMAP_FLAGS_HUGE ;
|
||||
? KOKKOS_IMPL_POSIX_MMAP_FLAGS
|
||||
: KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE ;
|
||||
|
||||
// read write access to private memory
|
||||
|
||||
@ -314,19 +314,19 @@ void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_
|
||||
free( alloc_ptr );
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_INTEL_MM_ALLOC_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC )
|
||||
else if ( m_alloc_mech == INTEL_MM_ALLOC ) {
|
||||
_mm_free( arg_alloc_ptr );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MEMALIGN_AVAILABLE )
|
||||
#if defined( KOKKOS_ENABLE_POSIX_MEMALIGN )
|
||||
else if ( m_alloc_mech == POSIX_MEMALIGN ) {
|
||||
free( arg_alloc_ptr );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_POSIX_MMAP_FLAGS )
|
||||
#if defined( KOKKOS_IMPL_POSIX_MMAP_FLAGS )
|
||||
else if ( m_alloc_mech == POSIX_MMAP ) {
|
||||
munmap( arg_alloc_ptr , arg_alloc_size );
|
||||
}
|
||||
|
||||
@ -41,8 +41,8 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE )
|
||||
#define KOKKOS_MEMORY_FENCE
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE_HPP )
|
||||
#define KOKKOS_MEMORY_FENCE_HPP
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -52,14 +52,14 @@ void memory_fence()
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_GCC ) || \
|
||||
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ATOMICS_USE_INTEL ) )
|
||||
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS ) || \
|
||||
( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ENABLE_INTEL_ATOMICS ) )
|
||||
__sync_synchronize();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
|
||||
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
|
||||
_mm_mfence();
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
#pragma omp flush
|
||||
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
|
||||
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
|
||||
MemoryBarrier();
|
||||
#else
|
||||
#error "Error: memory_fence() not defined"
|
||||
@ -74,7 +74,7 @@ void memory_fence()
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void store_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"sfence" ::: "memory"
|
||||
);
|
||||
@ -91,7 +91,7 @@ void store_fence()
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void load_fence()
|
||||
{
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_USE_ISA_X86_64 )
|
||||
#if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 )
|
||||
asm volatile (
|
||||
"lfence" ::: "memory"
|
||||
);
|
||||
|
||||
447
lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp
Normal file
447
lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp
Normal file
@ -0,0 +1,447 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_OLD_MACROS_HPP
|
||||
#define KOKKOS_IMPL_OLD_MACROS_HPP
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_CUDA
|
||||
#ifndef KOKKOS_ENABLE_CUDA_ATOMICS
|
||||
#define KOKKOS_ENABLE_CUDA_ATOMICS KOKKOS_ATOMICS_USE_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_GCC
|
||||
#ifndef KOKKOS_ENABLE_GNU_ATOMICS
|
||||
#define KOKKOS_ENABLE_GNU_ATOMICS KOKKOS_ATOMICS_USE_GCC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_GNU
|
||||
#ifndef KOKKOS_ENABLE_GNU_ATOMICS
|
||||
#define KOKKOS_ENABLE_GNU_ATOMICS KOKKOS_ATOMICS_USE_GNU
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_INTEL
|
||||
#ifndef KOKKOS_ENABLE_INTEL_ATOMICS
|
||||
#define KOKKOS_ENABLE_INTEL_ATOMICS KOKKOS_ATOMICS_USE_INTEL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_OMP31
|
||||
#ifndef KOKKOS_ENABLE_OPENMP_ATOMICS
|
||||
#define KOKKOS_ENABLE_OPENMP_ATOMICS KOKKOS_ATOMICS_USE_OMP31
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_OPENMP31
|
||||
#ifndef KOKKOS_ENABLE_OPENMP_ATOMICS
|
||||
#define KOKKOS_ENABLE_OPENMP_ATOMICS KOKKOS_ATOMICS_USE_OPENMP31
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ATOMICS_USE_WINDOWS
|
||||
#ifndef KOKKOS_ENABLE_WINDOWS_ATOMICS
|
||||
#define KOKKOS_ENABLE_WINDOWS_ATOMICS KOKKOS_ATOMICS_USE_WINDOWS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
|
||||
#ifndef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
|
||||
#define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND KOKKOS_CUDA_CLANG_WORKAROUND
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_LAMBDA
|
||||
#ifndef KOKKOS_ENABLE_CUDA_LAMBDA
|
||||
#define KOKKOS_ENABLE_CUDA_LAMBDA KOKKOS_CUDA_USE_LAMBDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_LDG_INTRINSIC
|
||||
#ifndef KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
|
||||
#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC KOKKOS_CUDA_USE_LDG_INTRINSIC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_UVM
|
||||
#ifndef KOKKOS_ENABLE_CUDA_UVM
|
||||
#define KOKKOS_ENABLE_CUDA_UVM KOKKOS_CUDA_USE_UVM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifndef KOKKOS_ENABLE_CUDA
|
||||
#define KOKKOS_ENABLE_CUDA KOKKOS_HAVE_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA_LAMBDA
|
||||
#ifndef KOKKOS_ENABLE_CUDA_LAMBDA
|
||||
#define KOKKOS_ENABLE_CUDA_LAMBDA KOKKOS_HAVE_CUDA_LAMBDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA_RDC
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RDC
|
||||
#define KOKKOS_ENABLE_CUDA_RDC KOKKOS_HAVE_CUDA_RDC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUSPARSE
|
||||
#ifndef KOKKOS_ENABLE_CUSPARSE
|
||||
#define KOKKOS_ENABLE_CUSPARSE KOKKOS_HAVE_CUSPARSE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
|
||||
#ifndef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
|
||||
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX1Z
|
||||
#ifndef KOKKOS_ENABLE_CXX1Z
|
||||
#define KOKKOS_ENABLE_CXX1Z KOKKOS_HAVE_CXX1Z
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEBUG
|
||||
#ifndef KOKKOS_DEBUG
|
||||
#define KOKKOS_DEBUG KOKKOS_HAVE_DEBUG
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#ifndef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifndef KOKKOS_ENABLE_HBWSPACE
|
||||
#define KOKKOS_ENABLE_HBWSPACE KOKKOS_HAVE_HBWSPACE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_HWLOC
|
||||
#ifndef KOKKOS_ENABLE_HWLOC
|
||||
#define KOKKOS_ENABLE_HWLOC KOKKOS_HAVE_HWLOC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_MPI
|
||||
#ifndef KOKKOS_ENABLE_MPI
|
||||
#define KOKKOS_ENABLE_MPI KOKKOS_HAVE_MPI
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
#ifndef KOKKOS_ENABLE_OPENMP
|
||||
#define KOKKOS_ENABLE_OPENMP KOKKOS_HAVE_OPENMP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#define KOKKOS_ENABLE_PRAGMA_IVDEP KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_LOOPCOUNT
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_LOOPCOUNT
|
||||
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT KOKKOS_HAVE_PRAGMA_LOOPCOUNT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_SIMD
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_SIMD
|
||||
#define KOKKOS_ENABLE_PRAGMA_SIMD KOKKOS_HAVE_PRAGMA_SIMD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_UNROLL
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_UNROLL
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL KOKKOS_HAVE_PRAGMA_UNROLL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_VECTOR
|
||||
#ifndef KOKKOS_ENABLE_PRAGMA_VECTOR
|
||||
#define KOKKOS_ENABLE_PRAGMA_VECTOR KOKKOS_HAVE_PRAGMA_VECTOR
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PTHREAD
|
||||
#ifndef KOKKOS_ENABLE_PTHREAD
|
||||
#define KOKKOS_ENABLE_PTHREAD KOKKOS_HAVE_PTHREAD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_QTHREAD
|
||||
#ifndef KOKKOS_ENABLE_QTHREAD
|
||||
#define KOKKOS_ENABLE_QTHREAD KOKKOS_HAVE_QTHREAD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_SERIAL
|
||||
#ifndef KOKKOS_ENABLE_SERIAL
|
||||
#define KOKKOS_ENABLE_SERIAL KOKKOS_HAVE_SERIAL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_TYPE
|
||||
#ifndef KOKKOS_IMPL_HAS_TYPE
|
||||
#define KOKKOS_IMPL_HAS_TYPE KOKKOS_HAVE_TYPE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_WINTHREAD
|
||||
#ifndef KOKKOS_ENABLE_WINTHREAD
|
||||
#define KOKKOS_ENABLE_WINTHREAD KOKKOS_HAVE_WINTHREAD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_Winthread
|
||||
#ifndef KOKKOS_ENABLE_WINTHREAD
|
||||
#define KOKKOS_ENABLE_WINTHREAD KOKKOS_HAVE_Winthread
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_INTEL_MM_ALLOC_AVAILABLE
|
||||
#ifndef KOKKOS_ENABLE_INTEL_MM_ALLOC
|
||||
#define KOKKOS_ENABLE_INTEL_MM_ALLOC KOKKOS_INTEL_MM_ALLOC_AVAILABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MACRO_IMPL_TO_STRING
|
||||
#ifndef KOKKOS_IMPL_MACRO_TO_STRING
|
||||
#define KOKKOS_IMPL_MACRO_TO_STRING KOKKOS_MACRO_IMPL_TO_STRING
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MACRO_TO_STRING
|
||||
#ifndef KOKKOS_MACRO_TO_STRING
|
||||
#define KOKKOS_MACRO_TO_STRING KOKKOS_MACRO_TO_STRING
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MAY_ALIAS
|
||||
#ifndef KOKKOS_IMPL_MAY_ALIAS
|
||||
#define KOKKOS_IMPL_MAY_ALIAS KOKKOS_MAY_ALIAS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MDRANGE_IVDEP
|
||||
#ifndef KOKKOS_IMPL_MDRANGE_IVDEP
|
||||
#define KOKKOS_IMPL_MDRANGE_IVDEP KOKKOS_MDRANGE_IVDEP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINTERR
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINTERR KOKKOS_MEMPOOL_PRINTERR
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_INFO KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#ifndef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#define KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_POSIX_MEMALIGN_AVAILABLE
|
||||
#ifndef KOKKOS_ENABLE_POSIX_MEMALIGN
|
||||
#define KOKKOS_ENABLE_POSIX_MEMALIGN KOKKOS_POSIX_MEMALIGN_AVAILABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_POSIX_MMAP_FLAGS
|
||||
#ifndef KOKKOS_IMPL_POSIX_MMAP_FLAGS
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS KOKKOS_POSIX_MMAP_FLAGS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_POSIX_MMAP_FLAGS_HUGE
|
||||
#ifndef KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE
|
||||
#define KOKKOS_IMPL_POSIX_MMAP_FLAGS_HUGE KOKKOS_POSIX_MMAP_FLAGS_HUGE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#ifndef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_CUDA_UVM
|
||||
#ifndef KOKKOS_ENABLE_CUDA_UVM
|
||||
#define KOKKOS_ENABLE_CUDA_UVM KOKKOS_USE_CUDA_UVM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_ISA_KNC
|
||||
#ifndef KOKKOS_ENABLE_ISA_KNC
|
||||
#define KOKKOS_ENABLE_ISA_KNC KOKKOS_USE_ISA_KNC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_ISA_POWERPCLE
|
||||
#ifndef KOKKOS_ENABLE_ISA_POWERPCLE
|
||||
#define KOKKOS_ENABLE_ISA_POWERPCLE KOKKOS_USE_ISA_POWERPCLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_ISA_X86_64
|
||||
#ifndef KOKKOS_ENABLE_ISA_X86_64
|
||||
#define KOKKOS_ENABLE_ISA_X86_64 KOKKOS_USE_ISA_X86_64
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifndef KOKKOS_ENABLE_LIBRT
|
||||
#define KOKKOS_ENABLE_LIBRT KOKKOS_USE_LIBRT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_VIEW_OPERATOR_VERIFY
|
||||
#ifndef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY KOKKOS_VIEW_OPERATOR_VERIFY
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Deprecated macros
|
||||
//------------------------------------------------------------------------------
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
#undef KOKKOS_HAVE_CXX11
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_CXX11
|
||||
#undef KOKKOS_ENABLE_CXX11
|
||||
#endif
|
||||
#ifdef KOKKOS_USING_EXP_VIEW
|
||||
#undef KOKKOS_USING_EXP_VIEW
|
||||
#endif
|
||||
#ifdef KOKKOS_USING_EXPERIMENTAL_VIEW
|
||||
#undef KOKKOS_USING_EXPERIMENTAL_VIEW
|
||||
#endif
|
||||
|
||||
#define KOKKOS_HAVE_CXX11 1
|
||||
#define KOKKOS_ENABLE_CXX11 1
|
||||
#define KOKKOS_USING_EXP_VIEW 1
|
||||
#define KOKKOS_USING_EXPERIMENTAL_VIEW 1
|
||||
|
||||
#endif //KOKKOS_IMPL_OLD_MACROS_HPP
|
||||
@ -47,7 +47,7 @@
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
@ -114,6 +114,6 @@ void * Serial::scratch_memory_resize( unsigned reduce_size , unsigned shared_siz
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif // defined( KOKKOS_HAVE_SERIAL )
|
||||
#endif // defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <impl/Kokkos_Serial_Task.hpp>
|
||||
#include <impl/Kokkos_TaskQueue_impl.hpp>
|
||||
@ -144,5 +144,5 @@ void TaskQueueSpecialization< Kokkos::Serial > ::
|
||||
|
||||
}} /* namespace Kokkos::Impl */
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
@ -240,7 +240,7 @@ void parallel_reduce
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
initialized_result = ValueType();
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -259,7 +259,7 @@ void parallel_reduce
|
||||
ValueType& initialized_result)
|
||||
{
|
||||
ValueType result = initialized_result;
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
|
||||
@ -260,22 +260,22 @@ public:
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED \
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED \
|
||||
Record::tracking_enabled()
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT \
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT \
|
||||
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::increment( m_record );
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT \
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT \
|
||||
if ( ! ( m_record_bits & DO_NOT_DEREF_FLAG ) ) Record::decrement( m_record );
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED 0
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED 0
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT /* */
|
||||
|
||||
#define KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
|
||||
#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT /* */
|
||||
|
||||
#endif
|
||||
|
||||
@ -319,7 +319,7 @@ public:
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
~SharedAllocationTracker()
|
||||
{ KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT }
|
||||
{ KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
constexpr SharedAllocationTracker()
|
||||
@ -336,7 +336,7 @@ public:
|
||||
SharedAllocationTracker & operator = ( SharedAllocationTracker && rhs )
|
||||
{
|
||||
// If this is tracking then must decrement
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
// Move and reset RHS to default constructed value.
|
||||
m_record_bits = rhs.m_record_bits ;
|
||||
rhs.m_record_bits = DO_NOT_DEREF_FLAG ;
|
||||
@ -347,32 +347,32 @@ public:
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker( const SharedAllocationTracker & rhs )
|
||||
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
: m_record_bits( KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
|
||||
{
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
}
|
||||
|
||||
/** \brief Copy construction may disable tracking. */
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker( const SharedAllocationTracker & rhs
|
||||
, const bool enable_tracking )
|
||||
: m_record_bits( KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
: m_record_bits( KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
&& enable_tracking
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG )
|
||||
{ KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT }
|
||||
{ KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
SharedAllocationTracker & operator = ( const SharedAllocationTracker & rhs )
|
||||
{
|
||||
// If this is tracking then must decrement
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
return *this ;
|
||||
}
|
||||
|
||||
@ -381,17 +381,17 @@ public:
|
||||
void assign( const SharedAllocationTracker & rhs
|
||||
, const bool enable_tracking )
|
||||
{
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
m_record_bits = KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
&& enable_tracking
|
||||
? rhs.m_record_bits
|
||||
: rhs.m_record_bits | DO_NOT_DEREF_FLAG ;
|
||||
KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
}
|
||||
|
||||
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#undef KOKKOS_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_ENABLED
|
||||
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
|
||||
#undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
|
||||
|
||||
};
|
||||
|
||||
|
||||
@ -51,17 +51,17 @@
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
/** KOKKOS_HAVE_TYPE( Type )
|
||||
/** KOKKOS_IMPL_HAS_TYPE( Type )
|
||||
*
|
||||
* defines a meta-function that check if a type expose an internal typedef or
|
||||
* type alias which matches Type
|
||||
*
|
||||
* e.g.
|
||||
* KOKKOS_HAVE_TYPE( array_layout );
|
||||
* KOKKOS_IMPL_HAS_TYPE( array_layout );
|
||||
* struct Foo { using array_layout = void; };
|
||||
* have_array_layout<Foo>::value == 1;
|
||||
*/
|
||||
#define KOKKOS_HAVE_TYPE( TYPE ) \
|
||||
#define KOKKOS_IMPL_HAS_TYPE( TYPE ) \
|
||||
template <typename T> struct have_ ## TYPE { \
|
||||
private: \
|
||||
template <typename U, typename = void > struct X : std::false_type {}; \
|
||||
|
||||
@ -152,6 +152,16 @@ private:
|
||||
KOKKOS_FUNCTION
|
||||
void schedule( task_root_type * const );
|
||||
|
||||
// Reschedule a task
|
||||
// Precondition:
|
||||
// task is in Executing state
|
||||
// task->m_next == LockTag
|
||||
// Postcondition:
|
||||
// task is in Executing-Respawn state
|
||||
// task->m_next == 0 (no dependence)
|
||||
KOKKOS_FUNCTION
|
||||
void reschedule( task_root_type * );
|
||||
|
||||
// Complete a task
|
||||
// Precondition:
|
||||
// task is not executing
|
||||
@ -187,6 +197,12 @@ public:
|
||||
|
||||
void execute() { specialization::execute( this ); }
|
||||
|
||||
template< typename FunctorType >
|
||||
void proc_set_apply( typename task_root_type::function_type * ptr )
|
||||
{
|
||||
specialization::template proc_set_apply< FunctorType >( ptr );
|
||||
}
|
||||
|
||||
// Assign task pointer with reference counting of assigned tasks
|
||||
template< typename LV , typename RV >
|
||||
KOKKOS_FUNCTION static
|
||||
@ -378,6 +394,31 @@ public:
|
||||
TaskBase ** aggregate_dependences()
|
||||
{ return reinterpret_cast<TaskBase**>( this + 1 ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool requested_respawn()
|
||||
{
|
||||
// This should only be called when a task has finished executing and is
|
||||
// in the transition to either the complete or executing-respawn state.
|
||||
TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag );
|
||||
return lock != m_next;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_dependence( TaskBase* dep )
|
||||
{
|
||||
// Assign dependence to m_next. It will be processed in the subsequent
|
||||
// call to schedule. Error if the dependence is reset.
|
||||
if ( 0 != Kokkos::atomic_exchange( & m_next, dep ) ) {
|
||||
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
|
||||
}
|
||||
|
||||
if ( 0 != dep ) {
|
||||
// The future may be destroyed upon returning from this call
|
||||
// so increment reference count to track this assignment.
|
||||
Kokkos::atomic_increment( &(dep->m_ref_count) );
|
||||
}
|
||||
}
|
||||
|
||||
using get_return_type = void ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -415,7 +456,6 @@ public:
|
||||
get_return_type get() const { return m_result ; }
|
||||
};
|
||||
|
||||
|
||||
template< typename ExecSpace , typename ResultType , typename FunctorType >
|
||||
class TaskBase
|
||||
: public TaskBase< ExecSpace , ResultType , void >
|
||||
@ -468,30 +508,28 @@ public:
|
||||
KOKKOS_FUNCTION static
|
||||
void apply( root_type * root , void * exec )
|
||||
{
|
||||
TaskBase * const lock = reinterpret_cast< TaskBase * >( root_type::LockTag );
|
||||
TaskBase * const task = static_cast< TaskBase * >( root );
|
||||
member_type * const member = reinterpret_cast< member_type * >( exec );
|
||||
|
||||
TaskBase::template apply_functor( task , member );
|
||||
|
||||
// Task may be serial or team.
|
||||
// If team then must synchronize before querying task->m_next.
|
||||
// If team then must synchronize before querying if respawn was requested.
|
||||
// If team then only one thread calls destructor.
|
||||
|
||||
member->team_barrier();
|
||||
|
||||
if ( 0 == member->team_rank() && lock == task->m_next ) {
|
||||
// Did not respawn, destroy the functor to free memory
|
||||
if ( 0 == member->team_rank() && !(task->requested_respawn()) ) {
|
||||
// Did not respawn, destroy the functor to free memory.
|
||||
static_cast<functor_type*>(task)->~functor_type();
|
||||
// Cannot destroy the task until its dependences
|
||||
// have been processed.
|
||||
// Cannot destroy the task until its dependences have been processed.
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
TaskBase( FunctorType const & arg_functor )
|
||||
TaskBase( functor_type const & arg_functor )
|
||||
: base_type()
|
||||
, FunctorType( arg_functor )
|
||||
, functor_type( arg_functor )
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -506,4 +544,3 @@ public:
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */
|
||||
|
||||
|
||||
@ -476,6 +476,28 @@ void TaskQueue< ExecSpace >::schedule
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::reschedule( task_root_type * task )
|
||||
{
|
||||
// Precondition:
|
||||
// task is in Executing state
|
||||
// task->m_next == LockTag
|
||||
//
|
||||
// Postcondition:
|
||||
// task is in Executing-Respawn state
|
||||
// task->m_next == 0 (no dependence)
|
||||
|
||||
task_root_type * const zero = (task_root_type *) 0 ;
|
||||
task_root_type * const lock = (task_root_type *) task_root_type::LockTag ;
|
||||
|
||||
if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) {
|
||||
Kokkos::abort("TaskScheduler::respawn ERROR: already respawned");
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ExecSpace >
|
||||
KOKKOS_FUNCTION
|
||||
void TaskQueue< ExecSpace >::complete
|
||||
@ -565,6 +587,4 @@ void TaskQueue< ExecSpace >::complete
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user