Update to Kokkos library v2.5.00

This commit is contained in:
Stan Moore
2017-12-15 16:42:06 -07:00
parent da83feb8ca
commit a2756db66b
292 changed files with 8238 additions and 2823 deletions

View File

@ -1,4 +1,44 @@
# Change Log
## [2.5.00](https://github.com/kokkos/kokkos/tree/2.5.00) (2017-12-15)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.11...2.5.00)
**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.5**
**Implemented enhancements:**
- Provide Makefile.kokkos logic for CMake and TriBITS [\#878](https://github.com/kokkos/kokkos/issues/878)
- Add Scatter View [\#825](https://github.com/kokkos/kokkos/issues/825)
- Drop gcc 4.7 and intel 14 from supported compiler list [\#603](https://github.com/kokkos/kokkos/issues/603)
- Enable construction of unmanaged view using common\_view\_alloc\_prop [\#1170](https://github.com/kokkos/kokkos/issues/1170)
- Unused Function Warning with XL [\#1267](https://github.com/kokkos/kokkos/issues/1267)
- Add memory pool parameter check [\#1218](https://github.com/kokkos/kokkos/issues/1218)
- CUDA9: Fix warning for unsupported long double [\#1189](https://github.com/kokkos/kokkos/issues/1189)
- CUDA9: fix warning on defaulted function marking [\#1188](https://github.com/kokkos/kokkos/issues/1188)
- CUDA9: fix warnings for deprecated warp level functions [\#1187](https://github.com/kokkos/kokkos/issues/1187)
- Add CUDA 9.0 nightly testing [\#1174](https://github.com/kokkos/kokkos/issues/1174)
- {OMPI,MPICH}\_CXX hack breaks nvcc\_wrapper use case [\#1166](https://github.com/kokkos/kokkos/issues/1166)
- KOKKOS\_HAVE\_CUDA\_LAMBDA became KOKKOS\_CUDA\_USE\_LAMBDA [\#1274](https://github.com/kokkos/kokkos/issues/1274)
**Fixed bugs:**
- MinMax Reducer with tagged operator doesn't compile [\#1251](https://github.com/kokkos/kokkos/issues/1251)
- Reducers for Tagged operators give wrong answer [\#1250](https://github.com/kokkos/kokkos/issues/1250)
- Kokkos not Compatible with Big Endian Machines? [\#1235](https://github.com/kokkos/kokkos/issues/1235)
- Parallel Scan hangs forever on BG/Q [\#1234](https://github.com/kokkos/kokkos/issues/1234)
- Threads backend doesn't compile with Clang on OS X [\#1232](https://github.com/kokkos/kokkos/issues/1232)
- $\(shell date\) needs quote [\#1264](https://github.com/kokkos/kokkos/issues/1264)
- Unqualified parallel\_for call conflicts with user-defined parallel\_for [\#1219](https://github.com/kokkos/kokkos/issues/1219)
- KokkosAlgorithms: CMake issue in unit tests [\#1212](https://github.com/kokkos/kokkos/issues/1212)
- Intel 18 Error: "simd pragma has been deprecated" [\#1210](https://github.com/kokkos/kokkos/issues/1210)
- Memory leak in Kokkos::initialize [\#1194](https://github.com/kokkos/kokkos/issues/1194)
- CUDA9: compiler error with static assert template arguments [\#1190](https://github.com/kokkos/kokkos/issues/1190)
- Kokkos::Serial::is\_initialized returns always true [\#1184](https://github.com/kokkos/kokkos/issues/1184)
- Triple nested parallelism still fails on bowman [\#1093](https://github.com/kokkos/kokkos/issues/1093)
- OpenMP openmp.range on Develop Runs Forever on POWER7+ with RHEL7 and GCC4.8.5 [\#995](https://github.com/kokkos/kokkos/issues/995)
- Rendezvous performance at global scope [\#985](https://github.com/kokkos/kokkos/issues/985)
## [2.04.11](https://github.com/kokkos/kokkos/tree/2.04.11) (2017-10-28)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.04...2.04.11)

View File

@ -1,3 +1,5 @@
# Is this a build as part of Trilinos?
IF(COMMAND TRIBITS_PACKAGE_DECL)
SET(KOKKOS_HAS_TRILINOS ON CACHE BOOL "")
ELSE()
@ -6,13 +8,57 @@ ENDIF()
IF(NOT KOKKOS_HAS_TRILINOS)
cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
project(Kokkos CXX)
INCLUDE(cmake/kokkos.cmake)
# Define Project Name if this is a standalone build
IF(NOT DEFINED ${PROJECT_NAME})
project(Kokkos CXX)
ENDIF()
# Basic initialization (Used in KOKKOS_SETTINGS)
set(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR})
set(KOKKOS_PATH ${KOKKOS_SRC_PATH})
#------------ COMPILER AND FEATURE CHECKS ------------------------------------
include(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake)
set_kokkos_cxx_compiler()
set_kokkos_cxx_standard()
#------------ GET OPTIONS AND KOKKOS_SETTINGS --------------------------------
# Add Kokkos' modules to CMake's module path.
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/")
set(KOKKOS_CMAKE_VERBOSE True)
include(${KOKKOS_SRC_PATH}/cmake/kokkos_options.cmake)
include(${KOKKOS_SRC_PATH}/cmake/kokkos_settings.cmake)
#------------ GENERATE HEADER AND SOURCE FILES -------------------------------
execute_process(
COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings
WORKING_DIRECTORY "${Kokkos_BINARY_DIR}"
OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out
RESULT_VARIABLE res
)
include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake)
set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC})
#------------ NOW BUILD ------------------------------------------------------
include(${KOKKOS_SRC_PATH}/cmake/kokkos_build.cmake)
#------------ Add in Fake Tribits Handling to allow unit test builds- --------
include(${KOKKOS_SRC_PATH}/cmake/tribits.cmake)
TRIBITS_PACKAGE_DECL(Kokkos)
ADD_SUBDIRECTORY(core)
ADD_SUBDIRECTORY(containers)
ADD_SUBDIRECTORY(algorithms)
ELSE()
#------------------------------------------------------------------------------
#
# A) Forward delcare the package so that certain options are also defined for
# A) Forward declare the package so that certain options are also defined for
# subpackages
#
@ -21,212 +67,28 @@ TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS)
#------------------------------------------------------------------------------
#
# B) Define the common options for Kokkos first so they can be used by
# subpackages as well.
# B) Install Kokkos' build files
#
# If using the Makefile-generated files, then need to set things up.
# Here, assume that TriBITS has been run from ProjectCompilerPostConfig.cmake
# and already generated KokkosCore_config.h and kokkos_generated_settings.cmake
# in the previously define Kokkos_GEN_DIR
# We need to copy them over to the correct place and source the cmake file
# mfh 01 Aug 2016: See Issue #61:
#
# https://github.com/kokkos/kokkos/issues/61
#
# Don't use TRIBITS_ADD_DEBUG_OPTION() here, because that defines
# HAVE_KOKKOS_DEBUG. We define KOKKOS_HAVE_DEBUG here instead,
# for compatibility with Kokkos' Makefile build system.
if(NOT KOKKOS_LEGACY_TRIBITS)
set(Kokkos_GEN_DIR ${CMAKE_BINARY_DIR})
file(COPY "${Kokkos_GEN_DIR}/KokkosCore_config.h"
DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS)
install(FILES "${Kokkos_GEN_DIR}/KokkosCore_config.h"
DESTINATION include)
file(COPY "${Kokkos_GEN_DIR}/kokkos_generated_settings.cmake"
DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS)
if (TPL_ENABLE_CUDA)
if (DEFINED CUDA_VERSION)
# there is a VERSION_GREATER_EQUAL, but only in CMake >= 3.7
if (CUDA_VERSION VERSION_EQUAL "7.5")
set(KOKKOS_HAVE_CUDA_GEQ_75 TRUE)
endif()
if (CUDA_VERSION VERSION_GREATER "7.5")
set(KOKKOS_HAVE_CUDA_GEQ_75 TRUE)
endif()
if (CUDA_VERSION VERSION_EQUAL "8.0")
set(KOKKOS_HAVE_CUDA_GEQ_80 TRUE)
endif()
if (CUDA_VERSION VERSION_GREATER "8.0")
set(KOKKOS_HAVE_CUDA_GEQ_80 TRUE)
endif()
endif()
endif()
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_DEBUG
KOKKOS_HAVE_DEBUG
"Enable run-time debug checks. These checks may be expensive, so they are disabled by default in a release build."
${${PROJECT_NAME}_ENABLE_DEBUG}
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_SIERRA_BUILD
KOKKOS_FOR_SIERRA
"Configure Kokkos for building within the Sierra build system."
OFF
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Cuda
KOKKOS_HAVE_CUDA
"Enable CUDA support in Kokkos."
"${KOKKOS_HAVE_CUDA_TPL}"
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Cuda_UVM
KOKKOS_USE_CUDA_UVM
"Enable CUDA Unified Virtual Memory as the default in Kokkos."
OFF
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Cuda_RDC
KOKKOS_HAVE_CUDA_RDC
"Enable CUDA Relocatable Device Code support in Kokkos."
OFF
)
set(Kokkos_ENABLE_Cuda_Lambda_DEFAULT OFF)
if (Kokkos_ENABLE_Cuda)
if (KOKKOS_HAVE_CUDA_GEQ_75)
if (CMAKE_CXX_FLAGS MATCHES "-expt-extended-lambda")
set(Kokkos_ENABLE_Cuda_Lambda_DEFAULT ON)
message("-- CUDA version is >= 7.5 and CMAKE_CXX_FLAGS contains -expt-extended-lambda,")
message("-- Kokkos_ENABLE_Cuda_Lambda defaults to ON")
else()
message("-- CMAKE_CXX_FLAGS doesn't contain -expt-extended-lambda,")
message("-- Kokkos_ENABLE_Cuda_Lambda defaults to OFF")
endif()
else()
message("-- CUDA version is < 7.5, Kokkos_ENABLE_Cuda_Lambda defaults to OFF")
endif()
endif()
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Cuda_Lambda
KOKKOS_HAVE_CUDA_LAMBDA
"Enable CUDA LAMBDA support in Kokkos."
"${Kokkos_ENABLE_Cuda_Lambda_DEFAULT}"
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Pthread
KOKKOS_HAVE_PTHREAD
"Enable Pthread support in Kokkos."
OFF
)
ASSERT_DEFINED(TPL_ENABLE_Pthread)
IF(Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread)
MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF). This is not allowed. Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.")
ENDIF()
IF(NOT TPL_ENABLE_Pthread)
ADD_DEFINITIONS(-DGTEST_HAS_PTHREAD=0)
ENDIF()
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_OpenMP
KOKKOS_HAVE_OPENMP
"Enable OpenMP support in Kokkos."
"${${PROJECT_NAME}_ENABLE_OpenMP}"
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_QTHREAD
KOKKOS_HAVE_QTHREADS
"Enable Qthreads support in Kokkos."
"${TPL_ENABLE_QTHREAD}"
)
# TODO: No longer an option in Kokkos. Needs to be removed.
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_CXX11
KOKKOS_HAVE_CXX11
"Enable C++11 support in Kokkos."
"${${PROJECT_NAME}_ENABLE_CXX11}"
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_HWLOC
KOKKOS_HAVE_HWLOC
"Enable HWLOC support in Kokkos."
"${TPL_ENABLE_HWLOC}"
)
# TODO: This is currently not used in Kokkos. Should it be removed?
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_MPI
KOKKOS_HAVE_MPI
"Enable MPI support in Kokkos."
"${TPL_ENABLE_MPI}"
)
# Set default value of Kokkos_ENABLE_Debug_Bounds_Check option
#
# CMake is case sensitive. The Kokkos_ENABLE_Debug_Bounds_Check
# option (defined below) is annoyingly not all caps, but we need to
# keep it that way for backwards compatibility. If users forget and
# try using an all-caps variable, then make it count by using the
# all-caps version as the default value of the original, not-all-caps
# option. Otherwise, the default value of this option comes from
# Kokkos_ENABLE_DEBUG (see Issue #367).
ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_DEBUG)
IF(DEFINED Kokkos_ENABLE_DEBUG_BOUNDS_CHECK)
IF(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK)
SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT ON)
ELSE()
SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}")
ENDIF()
ELSE()
SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}")
ENDIF()
ASSERT_DEFINED(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Debug_Bounds_Check
KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
"Enable Kokkos::View run-time bounds checking."
"${Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT}"
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Debug_DualView_Modify_Check
KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
"Enable abort when Kokkos::DualView modified on host and device without sync."
"${Kokkos_ENABLE_DEBUG}"
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Profiling
KOKKOS_ENABLE_PROFILING
"Enable KokkosP profiling support for kernel data collections."
"${TPL_ENABLE_DLlib}"
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Profiling_Load_Print
KOKKOS_ENABLE_PROFILING_LOAD_PRINT
"Print to standard output which profiling library was loaded."
OFF
)
# placeholder for future device...
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Winthread
KOKKOS_HAVE_WINTHREAD
"Enable Winthread support in Kokkos."
"${TPL_ENABLE_Winthread}"
)
# TODO: No longer an option in Kokkos. Needs to be removed.
# use new/old View
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_USING_DEPRECATED_VIEW
KOKKOS_USING_DEPRECATED_VIEW
"Choose whether to use the old, deprecated Kokkos::View"
OFF
)
include(${CMAKE_CURRENT_BINARY_DIR}/kokkos_generated_settings.cmake)
# Sources come from makefile-generated kokkos_generated_settings.cmake file
# Enable using the individual sources if needed
set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC})
endif ()
#------------------------------------------------------------------------------
@ -260,10 +122,6 @@ TRIBITS_PACKAGE_DEF()
TRIBITS_EXCLUDE_AUTOTOOLS_FILES()
TRIBITS_EXCLUDE_FILES(
classic/doc
classic/LinAlg/doc/CrsRefactorNotesMay2012
)
TRIBITS_PACKAGE_POSTPROCESS()
ENDIF()

View File

@ -28,33 +28,39 @@ KOKKOS_OPTIONS ?= ""
# Options: force_uvm,use_ldg,rdc,enable_lambda
KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
# Return a 1 if a string contains a substring and 0 if not
# Note the search string should be without '"'
# Example: $(call kokkos_has_string,"hwloc,librt",hwloc)
# Will return a 1
kokkos_has_string=$(if $(findstring $2,$1),1,0)
# Check for general settings.
KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l))
KOKKOS_INTERNAL_ENABLE_DEBUG := $(call kokkos_has_string,$(KOKKOS_DEBUG),yes)
KOKKOS_INTERNAL_ENABLE_CXX11 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++11)
KOKKOS_INTERNAL_ENABLE_CXX1Z := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1z)
# Check for external libraries.
KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l))
KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l))
KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc)
KOKKOS_INTERNAL_USE_LIBRT := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),librt)
KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),experimental_memkind)
# Check for advanced settings.
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "compiler_warnings" | wc -l))
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l))
KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l))
KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_dualview_modify_check" | wc -l))
KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "enable_profile_load_print" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l))
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization)
KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling)
KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check)
KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print)
KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),use_ldg)
KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),force_uvm)
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc)
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda)
# Check for Kokkos Host Execution Spaces one of which must be on.
KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(subst OpenMPTarget,,$(KOKKOS_DEVICES)) | grep OpenMP | wc -l))
KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l))
KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l))
KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP)
KOKKOS_INTERNAL_USE_PTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread)
KOKKOS_INTERNAL_USE_QTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Qthreads)
KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial)
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
@ -65,9 +71,9 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
endif
# Check for other Execution Spaces.
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
KOKKOS_INTERNAL_USE_ROCM := $(strip $(shell echo $(KOKKOS_DEVICES) | grep ROCm | wc -l))
KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l))
KOKKOS_INTERNAL_USE_CUDA := $(call kokkos_has_string,$(KOKKOS_DEVICES),Cuda)
KOKKOS_INTERNAL_USE_ROCM := $(call kokkos_has_string,$(KOKKOS_DEVICES),ROCm)
KOKKOS_INTERNAL_USE_OPENMPTARGET := $(call kokkos_has_string,$(KOKKOS_DEVICES),OpenMPTarget)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
@ -77,25 +83,20 @@ endif
# Check OS.
KOKKOS_OS := $(strip $(shell uname -s))
KOKKOS_INTERNAL_OS_CYGWIN := $(strip $(shell uname -s | grep CYGWIN | wc -l))
KOKKOS_INTERNAL_OS_LINUX := $(strip $(shell uname -s | grep Linux | wc -l))
KOKKOS_INTERNAL_OS_DARWIN := $(strip $(shell uname -s | grep Darwin | wc -l))
KOKKOS_INTERNAL_OS_CYGWIN := $(call kokkos_has_string,$(KOKKOS_OS),CYGWIN)
KOKKOS_INTERNAL_OS_LINUX := $(call kokkos_has_string,$(KOKKOS_OS),Linux)
KOKKOS_INTERNAL_OS_DARWIN := $(call kokkos_has_string,$(KOKKOS_OS),Darwin)
# Check compiler.
KOKKOS_INTERNAL_COMPILER_INTEL := $(strip $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l))
KOKKOS_INTERNAL_COMPILER_PGI := $(strip $(shell $(CXX) --version 2>&1 | grep PGI | wc -l))
KOKKOS_CXX_VERSION := $(strip $(shell $(CXX) --version 2>&1))
KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Intel Corporation)
KOKKOS_INTERNAL_COMPILER_PGI := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),PGI)
KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l))
KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l))
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(CXX) --version 2>&1 | grep nvcc | wc -l))
ifneq ($(OMPI_CXX),)
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(OMPI_CXX) --version 2>&1 | grep nvcc | wc -l))
endif
ifneq ($(MPICH_CXX),)
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(MPICH_CXX) --version 2>&1 | grep nvcc | wc -l))
endif
KOKKOS_INTERNAL_COMPILER_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep clang | wc -l))
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep "apple-darwin" | wc -l))
KOKKOS_INTERNAL_COMPILER_HCC := $(strip $(shell $(CXX) --version 2>&1 | grep HCC | wc -l))
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l))
KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang)
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),apple-darwin)
KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
KOKKOS_INTERNAL_COMPILER_CLANG = 1
@ -209,47 +210,48 @@ endif
# Check for Kokkos Architecture settings.
# Intel based.
KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l))
KOKKOS_INTERNAL_USE_ARCH_WSM := $(strip $(shell echo $(KOKKOS_ARCH) | grep WSM | wc -l))
KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l))
KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KNC := $(call kokkos_has_string,$(KOKKOS_ARCH),KNC)
KOKKOS_INTERNAL_USE_ARCH_WSM := $(call kokkos_has_string,$(KOKKOS_ARCH),WSM)
KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB)
KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW)
KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW)
KOKKOS_INTERNAL_USE_ARCH_SKX := $(call kokkos_has_string,$(KOKKOS_ARCH),SKX)
KOKKOS_INTERNAL_USE_ARCH_KNL := $(call kokkos_has_string,$(KOKKOS_ARCH),KNL)
# NVIDIA based.
NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper
KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal60 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler30)
KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler32)
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler35)
KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler37)
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell50)
KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell52)
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell53)
KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal61)
KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal60)
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
#SEK: This seems like a bug to me
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l))
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell)
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler)
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
@ -262,43 +264,43 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
endif
endif
# ARM based.
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l))
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv80)
KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv81)
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX)
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX) | bc))
# IBM based.
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power9 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(call kokkos_has_string,$(KOKKOS_ARCH),BGQ)
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power7)
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8)
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power9)
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
# AMD based.
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(strip $(shell echo $(KOKKOS_ARCH) | grep Ryzen | wc -l))
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(strip $(shell echo $(KOKKOS_ARCH) | grep Epyc | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kaveri | wc -l))
KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(strip $(shell echo $(KOKKOS_ARCH) | grep Carrizo | wc -l))
KOKKOS_INTERNAL_USE_ARCH_FIJI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Fiji | wc -l))
KOKKOS_INTERNAL_USE_ARCH_VEGA := $(strip $(shell echo $(KOKKOS_ARCH) | grep Vega | wc -l))
KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(strip $(shell echo $(KOKKOS_ARCH) | grep gfx901 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Ryzen)
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),Epyc)
KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(call kokkos_has_string,$(KOKKOS_ARCH),Kaveri)
KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(call kokkos_has_string,$(KOKKOS_ARCH),Carrizo)
KOKKOS_INTERNAL_USE_ARCH_FIJI := $(call kokkos_has_string,$(KOKKOS_ARCH),Fiji)
KOKKOS_INTERNAL_USE_ARCH_VEGA := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega)
KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(call kokkos_has_string,$(KOKKOS_ARCH),gfx901)
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
# Decide what ISA level we are able to support.
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM)+$(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER7) | bc ))
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
# Decide whether we can support transactional memory
KOKKOS_INTERNAL_USE_TM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
# Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc ))
@ -320,94 +322,100 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_WARNINGS)
endif
KOKKOS_LIBS = -lkokkos -ldl
KOKKOS_LIBS = -ldl
KOKKOS_LDFLAGS = -L$(shell pwd)
KOKKOS_SRC =
KOKKOS_HEADERS =
# Generating the KokkosCore_config.h file.
KOKKOS_INTERNAL_CONFIG_TMP=KokkosCore_config.tmp
KOKKOS_CONFIG_HEADER=KokkosCore_config.h
# Functions for generating config header file
kokkos_append_header = $(shell echo $1 >> $(KOKKOS_INTERNAL_CONFIG_TMP))
# Do not append first line
tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp)
tmp := $(shell date >> KokkosCore_config.tmp)
tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp)
tmp := $(call kokkos_append_header,"Makefile constructed configuration:")
tmp := $(call kokkos_append_header,"$(shell date)")
tmp := $(call kokkos_append_header,"----------------------------------------------*/")
tmp := $(shell echo '\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)' >> KokkosCore_config.tmp)
tmp := $(shell echo '\#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."' >> KokkosCore_config.tmp)
tmp := $(shell echo '\#else' >> KokkosCore_config.tmp)
tmp := $(shell echo '\#define KOKKOS_CORE_CONFIG_H' >> KokkosCore_config.tmp)
tmp := $(shell echo '\#endif' >> KokkosCore_config.tmp)
tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp)
tmp := $(call kokkos_append_header,'\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)')
tmp := $(call kokkos_append_header,'\#error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."')
tmp := $(call kokkos_append_header,'\#else')
tmp := $(call kokkos_append_header,'\#define KOKKOS_CORE_CONFIG_H')
tmp := $(call kokkos_append_header,'\#endif')
tmp := $(call kokkos_append_header,"/* Execution Spaces */")
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CUDA")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
tmp := $(shell echo '\#define KOKKOS_ENABLE_ROCM 1' >> KokkosCore_config.tmp)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM')
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET')
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp)
tmp := $(call kokkos_append_header,'\#define KOKKOS_HAVE_OPENMP')
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_PTHREAD")
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREADS 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_QTHREADS")
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_SERIAL")
endif
ifeq ($(KOKKOS_INTERNAL_USE_TM), 1)
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ENABLE_TM" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TM")
tmp := $(call kokkos_append_header,"\#endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_X86_64")
tmp := $(call kokkos_append_header,"\#endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_KNC")
tmp := $(call kokkos_append_header,"\#endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCLE")
tmp := $(call kokkos_append_header,"\#endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCBE" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCBE")
tmp := $(call kokkos_append_header,"\#endif")
endif
tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
tmp := $(call kokkos_append_header,"/* General Settings */")
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CXX11")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CXX11")
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CXX1Z")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
@ -417,26 +425,26 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
KOKKOS_CXXFLAGS += -g
KOKKOS_LDFLAGS += -g -ldl
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK")
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_DEBUG")
ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0)
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK")
endif
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1)
tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT")
endif
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
KOKKOS_LIBS += -lhwloc
tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HWLOC")
endif
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_LIBRT")
KOKKOS_LIBS += -lrt
endif
@ -444,36 +452,36 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
KOKKOS_LIBS += -lmemkind -lnuma
tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HBWSPACE")
endif
ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0)
tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING")
endif
tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp)
tmp := $(call kokkos_append_header,"/* Optimization Settings */")
ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
tmp := $(shell echo "\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION")
endif
tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
tmp := $(call kokkos_append_header,"/* Cuda Settings */")
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LDG_INTRINSIC")
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LDG_INTRINSIC")
endif
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_UVM")
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE")
KOKKOS_CXXFLAGS += --relocatable-device-code=true
KOKKOS_LDFLAGS += --relocatable-device-code=true
endif
@ -481,7 +489,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LAMBDA")
KOKKOS_CXXFLAGS += -expt-extended-lambda
else
$(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
@ -489,19 +497,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LAMBDA")
endif
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_CLANG_WORKAROUND" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_CLANG_WORKAROUND")
endif
endif
# Add Architecture flags.
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
@ -518,7 +526,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
@ -535,8 +543,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
@ -553,7 +561,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_SSE42 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_SSE42")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xSSE4.2
@ -575,7 +583,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -mavx
@ -597,7 +605,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER7 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER7")
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
@ -609,7 +617,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER8")
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
@ -630,7 +638,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER9")
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
@ -651,7 +659,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xCORE-AVX2
@ -673,7 +681,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xCORE-AVX2
@ -695,7 +703,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512MIC")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xMIC-AVX512
@ -716,7 +724,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512XEON")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xCORE-AVX512
@ -737,7 +745,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KNC")
KOKKOS_CXXFLAGS += -mmic
KOKKOS_LDFLAGS += -mmic
endif
@ -753,48 +761,48 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER30")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER32")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER35")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER37")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL50")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL52")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL53")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL60")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL61")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
endif
@ -811,28 +819,28 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
# Lets start with adding architecture defines
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KAVERI), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 701" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KAVERI 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 701")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KAVERI")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx701
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_CARRIZO), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 801" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_CARRIZO 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 801")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_CARRIZO")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx801
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_FIJI), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 803" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_FIJI 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 803")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_FIJI")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx803
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 900" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_VEGA 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 900")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx900
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_GFX901), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 901" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_GFX901 1" >> KokkosCore_config.tmp )
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 901")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_GFX901")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx901
endif
@ -952,6 +960,10 @@ ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1)
KOKKOS_CXXFLAGS += -U__STRICT_ANSI__
endif
# Set KokkosExtraLibs and add -lkokkos to link line
KOKKOS_EXTRA_LIBS := ${KOKKOS_LIBS}
KOKKOS_LIBS := -lkokkos ${KOKKOS_LIBS}
# Setting up dependencies.
KokkosCore_config.h:

View File

@ -22,8 +22,8 @@ Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokk
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
Kokkos_Rendezvous.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Rendezvous.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Rendezvous.cpp
Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp

View File

@ -41,48 +41,44 @@ hcedwar(at)sandia.gov and crtrott(at)sandia.gov
============================================================================
Primary tested compilers on X86 are:
GCC 4.7.2
GCC 4.8.4
GCC 4.9.2
GCC 4.9.3
GCC 5.1.0
GCC 5.2.0
Intel 14.0.4
GCC 5.3.0
GCC 6.1.0
Intel 15.0.2
Intel 16.0.1
Intel 17.0.098
Intel 17.1.132
Intel 17.1.043
Intel 17.4.196
Intel 18.0.128
Clang 3.5.2
Clang 3.6.1
Clang 3.7.1
Clang 3.8.1
Clang 3.9.0
PGI 17.1
Clang 4.0.0
Clang 4.0.0 for CUDA (CUDA Toolkit 8.0.44)
PGI 17.10
NVCC 7.0 for CUDA (with gcc 4.8.4)
NVCC 7.5 for CUDA (with gcc 4.8.4)
NVCC 8.0.44 for CUDA (with gcc 5.3.0)
Primary tested compilers on Power 8 are:
GCC 5.4.0 (OpenMP,Serial)
IBM XL 13.1.3 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug)
IBM XL 13.1.5 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug)
NVCC 8.0.44 for CUDA (with gcc 5.4.0)
NVCC 9.0.103 for CUDA (with gcc 6.3.0)
Primary tested compilers on Intel KNL are:
GCC 6.2.0
Intel 16.2.181 (with gcc 4.7.2)
Intel 17.0.098 (with gcc 4.7.2)
Intel 17.1.132 (with gcc 4.9.3)
Intel 16.4.258 (with gcc 4.7.2)
Intel 17.2.174 (with gcc 4.9.3)
Intel 18.0.061 (beta) (with gcc 4.9.3)
Secondary tested compilers are:
CUDA 7.0 (with gcc 4.8.4)
CUDA 7.5 (with gcc 4.8.4)
CUDA 8.0 (with gcc 5.3.0 on X86 and gcc 5.4.0 on Power8)
CUDA/Clang 8.0 using Clang/Trunk compiler
Intel 18.0.128 (with gcc 4.9.3)
Other compilers working:
X86:
Cygwin 2.1.0 64bit with gcc 4.9.3
Limited testing of the following compilers on POWER7+ systems:
GCC 4.8.5 (on RHEL7.1 POWER7+)
Known non-working combinations:
Power8:
Pthreads backend
@ -96,8 +92,8 @@ GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits
-Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
NVCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
Secondary compilers are passing without -Werror.
Other compilers are tested occasionally, in particular when pushing from develop to
master branch, without -Werror and only for a select set of backends.

View File

@ -2,7 +2,9 @@
TRIBITS_SUBPACKAGE(Algorithms)
ADD_SUBDIRECTORY(src)
IF(KOKKOS_HAS_TRILINOS)
ADD_SUBDIRECTORY(src)
ENDIF()
TRIBITS_ADD_TEST_DIRECTORIES(unit_tests)
#TRIBITS_ADD_TEST_DIRECTORIES(performance_tests)

View File

@ -3,6 +3,32 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
IF(NOT KOKKOS_HAS_TRILINOS)
IF(KOKKOS_SEPARATE_LIBS)
set(TEST_LINK_TARGETS kokkoscore)
ELSE()
set(TEST_LINK_TARGETS kokkos)
ENDIF()
ENDIF()
SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest)
INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR})
# mfh 03 Nov 2017: The gtest library used here must have a different
# name than that of the gtest library built in KokkosCore. We can't
# just refer to the library in KokkosCore's tests, because it's
# possible to build only (e.g.,) KokkosAlgorithms tests, without
# building KokkosCore tests.
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0")
TRIBITS_ADD_LIBRARY(
kokkosalgorithms_gtest
HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
TESTONLY
)
SET(SOURCES
UnitTestMain.cpp
TestCuda.cpp
@ -34,5 +60,5 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkosalgorithms_gtest ${TEST_LINK_TARGETS}
)

View File

@ -15,7 +15,8 @@ endif
CXXFLAGS = -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
LDFLAGS ?=
override LDFLAGS += -lpthread
include $(KOKKOS_PATH)/Makefile.kokkos

View File

@ -0,0 +1,84 @@
#!/bin/bash
# ---- Default Settings -----
# Paths
KOKKOS_PATH=${PWD}/kokkos
KOKKOS_KERNELS_PATH=${PWD}/kokkos-kernels
MINIMD_PATH=${PWD}/miniMD/kokkos
MINIFE_PATH=${PWD}/miniFE/kokkos
# Kokkos Configure Options
KOKKOS_DEVICES=OpenMP
KOKKOS_ARCH=SNB
# Compiler Options
CXX=mpicxx
OPT_FLAG="-O3"
while [[ $# > 0 ]]
do
key="$1"
case $key in
--kokkos-path*)
KOKKOS_PATH="${key#*=}"
;;
--kokkos-kernels-path*)
KOKKOS_KERNELS_PATH="${key#*=}"
;;
--minimd-path*)
MINIMD_PATH="${key#*=}"
;;
--minife-path*)
MINIFE_PATH="${key#*=}"
;;
--device-list*)
KOKKOS_DEVICES="${key#*=}"
;;
--arch*)
KOKKOS_ARCH="--arch=${key#*=}"
;;
--opt-flag*)
OPT_FLAG="${key#*=}"
;;
--compiler*)
CXX="${key#*=}"
;;
--with-cuda-options*)
KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}"
;;
--help*)
PRINT_HELP=True
;;
*)
# args, just append
ARGS="$ARGS $1"
;;
esac
shift
done
mkdir build
# Build BytesAndFlops
mkdir build/bytes_and_flops
cd build/bytes_and_flops
make KOKKOS_ARCH=${KOKKOS_ARCH} KOKKOS_DEVICES=${KOKKOS_DEVICES} CXX=${CXX} KOKKOS_PATH=${KOKKOS_PATH}\
CXXFLAGS=${OPT_FLAG} -f ${KOKKOS_PATH}/benchmarks/bytes_and_flops/Makefile -j 16
cd ../..
mkdir build/miniMD
cd build/miniMD
make KOKKOS_ARCH=${KOKKOS_ARCH} KOKKOS_DEVICES=${KOKKOS_DEVICES} CXX=${CXX} KOKKOS_PATH=${KOKKOS_PATH} \
CXXFLAGS=${OPT_FLAG} -f ${MINIMD_PATH}/Makefile -j 16
cd ../../
mkdir build/miniFE
cd build/miniFE
make KOKKOS_ARCH=${KOKKOS_ARCH} KOKKOS_DEVICES=${KOKKOS_DEVICES} CXX=${CXX} KOKKOS_PATH=${KOKKOS_PATH} \
CXXFLAGS=${OPT_FLAG} -f ${MINIFE_PATH}/src/Makefile -j 16
cd ../../

View File

@ -0,0 +1,37 @@
#!/bin/bash
# Kokkos
if [ ! -d "kokkos" ]; then
git clone https://github.com/kokkos/kokkos
fi
cd kokkos
git checkout develop
git pull
cd ..
# KokkosKernels
if [ ! -d "kokkos-kernels" ]; then
git clone https://github.com/kokkos/kokkos-kernels
fi
cd kokkos-kernels
git pull
cd ..
# MiniMD
if [ ! -d "miniMD" ]; then
git clone https://github.com/mantevo/miniMD
fi
cd miniMD
git pull
cd ..
# MiniFE
if [ ! -d "miniFE" ]; then
git clone https://github.com/mantevo/miniFE
fi
cd miniFE
git pull
cd ..

View File

@ -0,0 +1,14 @@
#!/bin/bash
SCRIPT_PATH=$1
KOKKOS_DEVICES=$2
KOKKOS_ARCH=$3
COMPILER=$4
if [[ $# < 4 ]]; then
echo "Usage: ./run_benchmark.bash PATH_TO_SCRIPTS KOKKOS_DEVICES KOKKOS_ARCH COMPILER"
else
${SCRIPT_PATH}/checkout_repos.bash
${SCRIPT_PATH}/build_code.bash --arch=${KOKKOS_ARCH} --device-list=${KOKKOS_DEVICES} --compiler=${COMPILER}
${SCRIPT_PATH}/run_tests.bash
fi

View File

@ -0,0 +1,44 @@
#!/bin/bash
# BytesAndFlops
cd build/bytes_and_flops
USE_CUDA=`grep "_CUDA 1" KokkosCore_config.h | wc -l`
if [[ ${USE_CUDA} > 0 ]]; then
BAF_EXE=bytes_and_flops.cuda
TEAM_SIZE=256
else
BAF_EXE=bytes_and_flops.host
TEAM_SIZE=1
fi
BAF_PERF_1=`./${BAF_EXE} 2 100000 1024 1 1 1 1 ${TEAM_SIZE} 6000 | awk '{print $12/174.5}'`
BAF_PERF_2=`./${BAF_EXE} 2 100000 1024 16 1 8 64 ${TEAM_SIZE} 6000 | awk '{print $14/1142.65}'`
echo "BytesAndFlops: ${BAF_PERF_1} ${BAF_PERF_2}"
cd ../..
# MiniMD
cd build/miniMD
cp ../../miniMD/kokkos/Cu_u6.eam ./
MD_PERF_1=`./miniMD --half_neigh 0 -s 60 --ntypes 1 -t ${OMP_NUM_THREADS} -i ../../miniMD/kokkos/in.eam.miniMD | grep PERF_SUMMARY | awk '{print $10/21163341}'`
MD_PERF_2=`./miniMD --half_neigh 0 -s 20 --ntypes 1 -t ${OMP_NUM_THREADS} -i ../../miniMD/kokkos/in.eam.miniMD | grep PERF_SUMMARY | awk '{print $10/13393417}'`
echo "MiniMD: ${MD_PERF_1} ${MD_PERF_2}"
cd ../..
# MiniFE
cd build/miniFE
rm *.yaml
./miniFE.x -nx 100 &> /dev/null
FE_PERF_1=`grep "CG Mflop" *.yaml | awk '{print $4/14174}'`
rm *.yaml
./miniFE.x -nx 50 &> /dev/null
FE_PERF_2=`grep "CG Mflop" *.yaml | awk '{print $4/11897}'`
cd ../..
echo "MiniFE: ${FE_PERF_1} ${FE_PERF_2}"
PERF_RESULT=`echo "${BAF_PERF_1} ${BAF_PERF_2} ${MD_PERF_1} ${MD_PERF_2} ${FE_PERF_1} ${FE_PERF_2}" | awk '{print ($1+$2+$3+$4+$5+$6)/6}'`
echo "Total Result: " ${PERF_RESULT}

View File

@ -1,7 +1,18 @@
KOKKOS_PATH = ${HOME}/kokkos
SRC = $(wildcard *.cpp)
KOKKOS_DEVICES=Cuda
KOKKOS_CUDA_OPTIONS=enable_lambda
KOKKOS_ARCH = "SNB,Kepler35"
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
ifndef KOKKOS_PATH
KOKKOS_PATH = $(MAKEFILE_PATH)../..
endif
SRC = $(wildcard $(MAKEFILE_PATH)*.cpp)
HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp)
vpath %.cpp $(sort $(dir $(SRC)))
default: build
echo "Start Build"
@ -9,22 +20,19 @@ default: build
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
EXE = bytes_and_flops.cuda
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
EXE = bytes_and_flops.host
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
CXXFLAGS = -O3 -g
CXXFLAGS ?= -O3 -g
override CXXFLAGS += -I$(MAKEFILE_PATH)
DEPFLAGS = -M
LINK = ${CXX}
LINKFLAGS =
OBJ = $(SRC:.cpp=.o)
OBJ = $(notdir $(SRC:.cpp=.o))
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
@ -39,5 +47,5 @@ clean: kokkos-clean
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) bench.hpp bench_unroll_stride.hpp bench_stride.hpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)

View File

@ -69,11 +69,11 @@ void test_policy(int team_range, int thread_range, int vector_range,
int team_size, int vector_size, int test_type,
ViewType1 &v1, ViewType2 &v2, ViewType3 &v3,
double &result, double &result_expect, double &time) {
typedef Kokkos::TeamPolicy<ScheduleType,IndexType> t_policy;
typedef typename t_policy::member_type t_team;
Kokkos::Timer timer;
for(int orep = 0; orep<outer_repeat; orep++) {
if (test_type == 100) {
@ -95,7 +95,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
v2( idx, t ) = t;
// prevent compiler optimizing loop away
});
}
}
});
}
if (test_type == 111) {
@ -178,12 +178,13 @@ void test_policy(int team_range, int thread_range, int vector_range,
for (int tr = 0; tr<thread_repeat; ++tr) {
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t, double &lval) {
double vector_result = 0.0;
for (int vr = 0; vr<inner_repeat; ++vr)
for (int vr = 0; vr<inner_repeat; ++vr) {
vector_result = 0.0;
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,vector_range), [&] (const int vi, double &vval) {
vval += 1;
}, vector_result);
lval += vector_result;
}
}, team_result);
}
v1(idx) = team_result;
@ -191,7 +192,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
});
}
if (test_type == 200) {
Kokkos::parallel_reduce("200 outer reduce", t_policy(team_range,team_size),
Kokkos::parallel_reduce("200 outer reduce", t_policy(team_range,team_size),
KOKKOS_LAMBDA (const t_team& team, double& lval) {
lval+=team.team_size()*team.league_rank() + team.team_rank();
},result);
@ -315,7 +316,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
// parallel_for RangePolicy: range = team_size*team_range
if (test_type == 300) {
Kokkos::parallel_for("300 outer for", team_size*team_range,
Kokkos::parallel_for("300 outer for", team_size*team_range,
KOKKOS_LAMBDA (const int idx) {
v1(idx) = idx;
// prevent compiler from optimizing away the loop
@ -323,7 +324,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
}
// parallel_reduce RangePolicy: range = team_size*team_range
if (test_type == 400) {
Kokkos::parallel_reduce("400 outer reduce", team_size*team_range,
Kokkos::parallel_reduce("400 outer reduce", team_size*team_range,
KOKKOS_LAMBDA (const int idx, double& val) {
val += idx;
}, result);
@ -331,7 +332,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
}
// parallel_scan RangePolicy: range = team_size*team_range
if (test_type == 500) {
Kokkos::parallel_scan("500 outer scan", team_size*team_range,
Kokkos::parallel_scan("500 outer scan", team_size*team_range,
ParallelScanFunctor<ViewType1>(v1)
#if 0
// This does not compile with pre Cuda 8.0 - see Github Issue #913 for explanation

View File

@ -26,6 +26,7 @@ fi
# Get parent cpuset
HPCBIND_HWLOC_PARENT_CPUSET=""
if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then
HPCBIND_HWLOC_VERSION="$(hwloc-ls --version | cut -d ' ' -f 2)"
MY_PID="$BASHPID"
HPCBIND_HWLOC_PARENT_CPUSET="$(hwloc-ps -a --cpuset | grep ${MY_PID} | cut -f 2)"
fi
@ -45,8 +46,11 @@ declare -i NUM_GPUS=0
HPCBIND_VISIBLE_GPUS=""
if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then
NUM_GPUS=$(nvidia-smi -L | wc -l);
GPU_LIST="$( seq 0 $((NUM_GPUS-1)) )"
HPCBIND_VISIBLE_GPUS=${CUDA_VISIBLE_DEVICES:-${GPU_LIST}}
HPCBIND_HAS_NVIDIA=$((!$?))
if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then
GPU_LIST="$( seq 0 $((NUM_GPUS-1)) )"
HPCBIND_VISIBLE_GPUS=${CUDA_VISIBLE_DEVICES:-${GPU_LIST}}
fi
fi
declare -i HPCBIND_ENABLE_GPU_MAPPING=$((NUM_GPUS > 0))
@ -57,33 +61,38 @@ declare -i HPCBIND_ENABLE_GPU_MAPPING=$((NUM_GPUS > 0))
# supports sbatch, bsub, aprun
################################################################################
HPCBIND_QUEUE_NAME=""
declare -i HPCBIND_QUEUE_INDEX=0
declare -i HPCBIND_QUEUE_RANK=0
declare -i HPCBIND_QUEUE_SIZE=0
declare -i HPCBIND_QUEUE_MAPPING=0
if [[ ! -z "${PMI_RANK}" ]]; then
HPCBIND_QUEUE_MAPPING=1
HPCBIND_QUEUE_NAME="mpich"
HPCBIND_QUEUE_INDEX=${PMI_RANK}
HPCBIND_QUEUE_RANK=${PMI_RANK}
HPCBIND_QUEUE_SIZE=${PMI_SIZE}
elif [[ ! -z "${OMPI_COMM_WORLD_RANK}" ]]; then
HPCBIND_QUEUE_MAPPING=1
HPCBIND_QUEUE_NAME="openmpi"
HPCBIND_QUEUE_INDEX=${OMPI_COMM_WORLD_RANK}
HPCBIND_QUEUE_RANK=${OMPI_COMM_WORLD_RANK}
HPCBIND_QUEUE_SIZE=${OMPI_COMM_WORLD_SIZE}
elif [[ ! -z "${MV2_COMM_WORLD_RANK}" ]]; then
HPCBIND_QUEUE_MAPPING=1
HPCBIND_QUEUE_NAME="mvapich2"
HPCBIND_QUEUE_INDEX=${MV2_COMM_WORLD_RANK}
HPCBIND_QUEUE_RANK=${MV2_COMM_WORLD_RANK}
HPCBIND_QUEUE_SIZE=${MV2_COMM_WORLD_SIZE}
elif [[ ! -z "${SLURM_LOCAL_ID}" ]]; then
HPCBIND_QUEUE_MAPPING=1
HPCBIND_QUEUE_NAME="slurm"
HPCBIND_QUEUE_INDEX=${SLURM_LOCAL_ID}
elif [[ ! -z "${LBS_JOBINDEX}" ]]; then
HPCBIND_QUEUE_MAPPING=1
HPCBIND_QUEUE_NAME="bsub"
HPCBIND_QUEUE_INDEX=${LBS_JOBINDEX}
HPCBIND_QUEUE_RANK=${SLURM_PROCID}
HPCBIND_QUEUE_SIZE=${SLURM_NPROCS}
elif [[ ! -z "${ALPS_APP_PE}" ]]; then
HPCBIND_QUEUE_MAPPING=1
HPCBIND_QUEUE_NAME="aprun"
HPCBIND_QUEUE_INDEX=${ALPS_APP_PE}
HPCBIND_QUEUE_RANK=${ALPS_APP_PE}
elif [[ ! -z "${LBS_JOBINDEX}" ]]; then
HPCBIND_QUEUE_MAPPING=1
HPCBIND_QUEUE_NAME="bsub"
HPCBIND_QUEUE_RANK=${LBS_JOBINDEX}
fi
################################################################################
@ -113,8 +122,8 @@ function show_help {
echo " --no-gpu-mapping Do not set CUDA_VISIBLE_DEVICES"
echo " --openmp=M.m Set env variables for the given OpenMP version"
echo " Default: 4.0"
echo " --openmp-percent=N Integer percentage of cpuset to use for OpenMP"
echo " threads Default: 100"
echo " --openmp-ratio=N/D Ratio of the cpuset to use for OpenMP"
echo " Default: 1"
echo " --openmp-places=<Op> Op=threads|cores|sockets. Default: threads"
echo " --no-openmp-proc-bind Set OMP_PROC_BIND to false and unset OMP_PLACES"
echo " --force-openmp-num-threads=N"
@ -123,8 +132,8 @@ function show_help {
echo " Override logic for selecting OMP_PROC_BIND"
echo " --no-openmp-nested Set OMP_NESTED to false"
echo " --output-prefix=<P> Save the output to files of the form"
echo " P-N.log, P-N.out and P-N.err where P is the prefix"
echo " and N is the queue index or mpi rank (no spaces)"
echo " P.hpcbind.N, P.stdout.N and P.stderr.N where P is "
echo " the prefix and N is the rank (no spaces)"
echo " --output-mode=<Op> How console output should be handled."
echo " Options are all, rank0, and none. Default: rank0"
echo " --lstopo Show bindings in lstopo"
@ -132,20 +141,27 @@ function show_help {
echo " -h|--help Show this message"
echo ""
echo "Sample Usage:"
echo ""
echo " Split the current process cpuset into 4 and use the 3rd partition"
echo " ${cmd} --distribute=4 --distribute-partition=2 -v -- command ..."
echo ""
echo " Launch 16 jobs over 4 nodes with 4 jobs per node using only the even pus"
echo " and save the output to rank specific files"
echo " mpiexec -N 16 -npernode 4 ${cmd} --whole-system --proc-bind=pu:even \\"
echo " --distribute=4 -v --output-prefix=output -- command ..."
echo ""
echo " Bind the process to all even cores"
echo " ${cmd} --proc-bind=core:even -v -- command ..."
echo ""
echo " Bind the the even cores of socket 0 and the odd cores of socket 1"
echo " ${cmd} --proc-bind='socket:0.core:even socket:1.core:odd' -v -- command ..."
echo ""
echo " Skip GPU 0 when mapping visible devices"
echo " ${cmd} --distribute=4 --distribute-partition=0 --visible-gpus=1,2 -v -- command ..."
echo ""
echo " Display the current bindings"
echo " ${cmd} --proc-bind=numa:0 -- command"
echo ""
echo " Display the current bindings using lstopo"
echo " ${cmd} --proc-bind=numa:0.core:odd --lstopo"
echo ""
@ -167,12 +183,13 @@ declare -i HPCBIND_DISTRIBUTE=1
declare -i HPCBIND_PARTITION=-1
HPCBIND_PROC_BIND="all"
HPCBIND_OPENMP_VERSION=4.0
declare -i HPCBIND_OPENMP_PERCENT=100
declare -i HPCBIND_OPENMP_RATIO_NUMERATOR=1
declare -i HPCBIND_OPENMP_RATIO_DENOMINATOR=1
HPCBIND_OPENMP_PLACES=${OMP_PLACES:-threads}
declare -i HPCBIND_OPENMP_PROC_BIND=1
declare -i HPCBIND_OPENMP_FORCE_NUM_THREADS=-1
HPCBIND_OPENMP_FORCE_NUM_THREADS=""
HPCBIND_OPENMP_FORCE_PROC_BIND=""
HPCBIND_OPENMP_NESTED=${OMP_NESTED:-true}
declare -i HPCBIND_OPENMP_NESTED=1
declare -i HPCBIND_VERBOSE=0
declare -i HPCBIND_LSTOPO=0
@ -199,6 +216,9 @@ for i in "$@"; do
;;
--distribute=*)
HPCBIND_DISTRIBUTE="${i#*=}"
if [[ ${HPCBIND_DISTRIBUTE} -le 0 ]]; then
HPCBIND_DISTRIBUTE=1
fi
shift
;;
# which partition to use
@ -222,8 +242,18 @@ for i in "$@"; do
HPCBIND_OPENMP_VERSION="${i#*=}"
shift
;;
--openmp-percent=*)
HPCBIND_OPENMP_PERCENT="${i#*=}"
--openmp-ratio=*)
IFS=/ read HPCBIND_OPENMP_RATIO_NUMERATOR HPCBIND_OPENMP_RATIO_DENOMINATOR <<< "${i#*=}"
if [[ ${HPCBIND_OPENMP_RATIO_NUMERATOR} -le 0 ]]; then
HPCBIND_OPENMP_RATIO_NUMERATOR=1
fi
if [[ ${HPCBIND_OPENMP_RATIO_DENOMINATOR} -le 0 ]]; then
HPCBIND_OPENMP_RATIO_DENOMINATOR=1
fi
if [[ ${HPCBIND_OPENMP_RATIO_NUMERATOR} -gt ${HPCBIND_OPENMP_RATIO_DENOMINATOR} ]]; then
HPCBIND_OPENMP_RATIO_NUMERATOR=1
HPCBIND_OPENMP_RATIO_DENOMINATOR=1
fi
shift
;;
--openmp-places=*)
@ -243,7 +273,7 @@ for i in "$@"; do
shift
;;
--no-openmp-nested)
HPCBIND_OPENMP_NESTED="false"
HPCBIND_OPENMP_NESTED=0
shift
;;
--output-prefix=*)
@ -292,7 +322,7 @@ if [[ "${HPCBIND_OUTPUT_MODE}" == "none" ]]; then
HPCBIND_TEE=0
elif [[ "${HPCBIND_OUTPUT_MODE}" == "all" ]]; then
HPCBIND_TEE=1
elif [[ ${HPCBIND_QUEUE_INDEX} -eq 0 ]]; then
elif [[ ${HPCBIND_QUEUE_RANK} -eq 0 ]]; then
#default to rank0 printing to screen
HPCBIND_TEE=1
fi
@ -303,9 +333,18 @@ if [[ "${HPCBIND_OUTPUT_PREFIX}" == "" ]]; then
HPCBIND_ERR=/dev/null
HPCBIND_OUT=/dev/null
else
HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_QUEUE_INDEX}"
HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_QUEUE_INDEX}"
HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_QUEUE_INDEX}"
if [[ ${HPCBIND_QUEUE_SIZE} -gt 0 ]]; then
HPCBIND_STR_QUEUE_SIZE="${HPCBIND_QUEUE_SIZE}"
HPCBIND_STR_QUEUE_RANK=$(printf %0*d ${#HPCBIND_STR_QUEUE_SIZE} ${HPCBIND_QUEUE_RANK})
HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_STR_QUEUE_RANK}"
HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_STR_QUEUE_RANK}"
HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_STR_QUEUE_RANK}"
else
HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_QUEUE_RANK}"
HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_QUEUE_RANK}"
HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_QUEUE_RANK}"
fi
> ${HPCBIND_LOG}
fi
@ -333,27 +372,12 @@ if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then
NUM_GPUS=${#HPCBIND_VISIBLE_GPUS[@]}
fi
################################################################################
# Check OpenMP percent
################################################################################
if [[ ${HPCBIND_OPENMP_PERCENT} -lt 1 ]]; then
HPCBIND_OPENMP_PERCENT=1
elif [[ ${HPCBIND_OPENMP_PERCENT} -gt 100 ]]; then
HPCBIND_OPENMP_PERCENT=100
fi
################################################################################
# Check distribute
################################################################################
if [[ ${HPCBIND_DISTRIBUTE} -le 0 ]]; then
HPCBIND_DISTRIBUTE=1
fi
################################################################################
#choose the correct partition
################################################################################
if [[ ${HPCBIND_PARTITION} -lt 0 && ${HPCBIND_QUEUE_MAPPING} -eq 1 ]]; then
HPCBIND_PARTITION=${HPCBIND_QUEUE_INDEX}
HPCBIND_PARTITION=${HPCBIND_QUEUE_RANK}
elif [[ ${HPCBIND_PARTITION} -lt 0 ]]; then
HPCBIND_PARTITION=0
fi
@ -381,23 +405,40 @@ if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then
else
HPCBIND_HWLOC_CPUSET="${BINDING}"
fi
HPCBIND_NUM_PUS=$(hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} --only pu | wc -l)
HPCBIND_NUM_PUS=$(hwloc-calc -q -N pu ${HPCBIND_HWLOC_CPUSET} )
if [ $? -ne 0 ]; then
HPCBIND_NUM_PUS=1
fi
HPCBIND_NUM_CORES=$(hwloc-calc -q -N core ${HPCBIND_HWLOC_CPUSET} )
if [ $? -ne 0 ]; then
HPCBIND_NUM_CORES=1
fi
HPCBIND_NUM_NUMAS=$(hwloc-calc -q -N numa ${HPCBIND_HWLOC_CPUSET} )
if [ $? -ne 0 ]; then
HPCBIND_NUM_NUMAS=1
fi
HPCBIND_NUM_SOCKETS=$(hwloc-calc -q -N socket ${HPCBIND_HWLOC_CPUSET} )
if [ $? -ne 0 ]; then
HPCBIND_NUM_SOCKETS=1
fi
else
HPCBIND_NUM_PUS=$(cat /proc/cpuinfo | grep -c processor)
HPCBIND_NUM_CORES=${HPCBIND_NUM_PUS}
HPCBIND_NUM_NUMAS=1
HPCBIND_NUM_SOCKETS=1
fi
declare -i HPCBIND_OPENMP_NUM_THREADS=$((HPCBIND_NUM_PUS * HPCBIND_OPENMP_PERCENT))
HPCBIND_OPENMP_NUM_THREADS=$((HPCBIND_OPENMP_NUM_THREADS / 100))
if [[ ${HPCBIND_OPENMP_NUM_THREADS} -lt 1 ]]; then
HPCBIND_OPENMP_NUM_THREADS=1
elif [[ ${HPCBIND_OPENMP_NUM_THREADS} -gt ${HPCBIND_NUM_PUS} ]]; then
HPCBIND_OPENMP_NUM_THREADS=${HPCBIND_NUM_PUS}
fi
if [[ ${HPCBIND_OPENMP_FORCE_NUM_THREADS} -gt 0 ]]; then
if [[ ${HPCBIND_OPENMP_FORCE_NUM_THREADS} != "" ]]; then
HPCBIND_OPENMP_NUM_THREADS=${HPCBIND_OPENMP_FORCE_NUM_THREADS}
else
declare -i HPCBIND_OPENMP_NUM_THREADS=$((HPCBIND_NUM_PUS * HPCBIND_OPENMP_RATIO_NUMERATOR / HPCBIND_OPENMP_RATIO_DENOMINATOR))
if [[ ${HPCBIND_OPENMP_NUM_THREADS} -lt 1 ]]; then
HPCBIND_OPENMP_NUM_THREADS=1
elif [[ ${HPCBIND_OPENMP_NUM_THREADS} -gt ${HPCBIND_NUM_PUS} ]]; then
HPCBIND_OPENMP_NUM_THREADS=${HPCBIND_NUM_PUS}
fi
fi
################################################################################
@ -405,7 +446,11 @@ fi
################################################################################
# set OMP_NUM_THREADS
export OMP_NUM_THREADS=${HPCBIND_OPENMP_NUM_THREADS}
if [[ ${HPCBIND_OPENMP_NESTED} -eq 1 ]]; then
export OMP_NUM_THREADS="${HPCBIND_OPENMP_NUM_THREADS},1"
else
export OMP_NUM_THREADS=${HPCBIND_OPENMP_NUM_THREADS}
fi
# set OMP_PROC_BIND and OMP_PLACES
if [[ ${HPCBIND_OPENMP_PROC_BIND} -eq 1 ]]; then
@ -413,7 +458,11 @@ if [[ ${HPCBIND_OPENMP_PROC_BIND} -eq 1 ]]; then
#default proc bind logic
if [[ "${HPCBIND_OPENMP_VERSION}" == "4.0" || "${HPCBIND_OPENMP_VERSION}" > "4.0" ]]; then
export OMP_PLACES="${HPCBIND_OPENMP_PLACES}"
export OMP_PROC_BIND="spread"
if [[ ${HPCBIND_OPENMP_NESTED} -eq 1 ]]; then
export OMP_PROC_BIND="spread,spread"
else
export OMP_PROC_BIND="spread"
fi
else
export OMP_PROC_BIND="true"
unset OMP_PLACES
@ -429,9 +478,17 @@ else
unset OMP_PROC_BIND
fi
# set OMP_NESTED
export OMP_NESTED=${HPCBIND_OPENMP_NESTED}
# set up hot teams (intel specific)
if [[ ${HPCBIND_OPENMP_NESTED} -eq 1 ]]; then
export OMP_NESTED="true"
export OMP_MAX_ACTIVE_LEVELS=2
export KMP_HOT_TEAMS=1
export KMP_HOT_TEAMS_MAX_LEVEL=2
else
export OMP_NESTED="false"
fi
# set OMP_NESTED
################################################################################
# Set CUDA environment variables
@ -442,7 +499,7 @@ if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then
declare -i GPU_ID=$((HPCBIND_PARTITION % NUM_GPUS))
export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}"
else
declare -i MY_TASK_ID=$((HPCBIND_QUEUE_INDEX * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION))
declare -i MY_TASK_ID=$((HPCBIND_QUEUE_RANK * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION))
declare -i GPU_ID=$((MY_TASK_ID % NUM_GPUS))
export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}"
fi
@ -451,12 +508,17 @@ fi
################################################################################
# Set hpcbind environment variables
################################################################################
export HPCBIND_HWLOC_VERSION=${HPCBIND_HWLOC_VERSION}
export HPCBIND_HAS_HWLOC=${HPCBIND_HAS_HWLOC}
export HPCBIND_HAS_NVIDIA=${HPCBIND_HAS_NVIDIA}
export HPCBIND_NUM_PUS=${HPCBIND_NUM_PUS}
export HPCBIND_NUM_CORES=${HPCBIND_NUM_CORES}
export HPCBIND_NUM_NUMAS=${HPCBIND_NUM_NUMAS}
export HPCBIND_NUM_SOCKETS=${HPCBIND_NUM_SOCKETS}
export HPCBIND_HWLOC_CPUSET="${HPCBIND_HWLOC_CPUSET}"
export HPCBIND_HWLOC_DISTRIBUTE=${HPCBIND_DISTRIBUTE}
export HPCBIND_HWLOC_DISTRIBUTE_PARTITION=${HPCBIND_PARTITION}
export HPCBIND_OPENMP_RATIO="${HPCBIND_OPENMP_RATIO_NUMERATOR}/${HPCBIND_OPENMP_RATIO_DENOMINATOR}"
if [[ "${HPCBIND_HWLOC_PARENT_CPUSET}" == "" ]]; then
export HPCBIND_HWLOC_PARENT_CPUSET="all"
else
@ -467,7 +529,8 @@ export HPCBIND_NVIDIA_ENABLE_GPU_MAPPING=${HPCBIND_ENABLE_GPU_MAPPING}
export HPCBIND_NVIDIA_VISIBLE_GPUS=$(echo "${HPCBIND_VISIBLE_GPUS[*]}" | tr ' ' ',')
export HPCBIND_OPENMP_VERSION="${HPCBIND_OPENMP_VERSION}"
if [[ "${HPCBIND_QUEUE_NAME}" != "" ]]; then
export HPCBIND_QUEUE_INDEX=${HPCBIND_QUEUE_INDEX}
export HPCBIND_QUEUE_RANK=${HPCBIND_QUEUE_RANK}
export HPCBIND_QUEUE_SIZE=${HPCBIND_QUEUE_SIZE}
export HPCBIND_QUEUE_NAME="${HPCBIND_QUEUE_NAME}"
export HPCBIND_QUEUE_MAPPING=${HPCBIND_QUEUE_MAPPING}
fi
@ -487,10 +550,16 @@ if [[ ${HPCBIND_TEE} -eq 0 || ${HPCBIND_VERBOSE} -eq 0 ]]; then
echo "${TMP_ENV}" | grep -E "^CUDA_" >> ${HPCBIND_LOG}
echo "[OPENMP]" >> ${HPCBIND_LOG}
echo "${TMP_ENV}" | grep -E "^OMP_" >> ${HPCBIND_LOG}
echo "[GOMP] (gcc, g++, and gfortran)" >> ${HPCBIND_LOG}
echo "${TMP_ENV}" | grep -E "^GOMP_" >> ${HPCBIND_LOG}
echo "[KMP] (icc, icpc, and ifort)" >> ${HPCBIND_LOG}
echo "${TMP_ENV}" | grep -E "^KMP_" >> ${HPCBIND_LOG}
echo "[XLSMPOPTS] (xlc, xlc++, and xlf)" >> ${HPCBIND_LOG}
echo "${TMP_ENV}" | grep -E "^XLSMPOPTS" >> ${HPCBIND_LOG}
if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then
echo "[BINDINGS]" >> ${HPCBIND_LOG}
hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" --only pu >> ${HPCBIND_LOG}
hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" >> ${HPCBIND_LOG}
else
echo "Unable to show bindings, hwloc not available." >> ${HPCBIND_LOG}
fi
@ -503,10 +572,16 @@ else
echo "${TMP_ENV}" | grep -E "^CUDA_" > >(tee -a ${HPCBIND_LOG})
echo "[OPENMP]" > >(tee -a ${HPCBIND_LOG})
echo "${TMP_ENV}" | grep -E "^OMP_" > >(tee -a ${HPCBIND_LOG})
echo "[GOMP] (gcc, g++, and gfortran)" > >(tee -a ${HPCBIND_LOG})
echo "${TMP_ENV}" | grep -E "^GOMP_" > >(tee -a ${HPCBIND_LOG})
echo "[KMP] (icc, icpc, and ifort)" > >(tee -a ${HPCBIND_LOG})
echo "${TMP_ENV}" | grep -E "^KMP_" > >(tee -a ${HPCBIND_LOG})
echo "[XLSMPOPTS] (xlc, xlc++, and xlf)" > >(tee -a ${HPCBIND_LOG})
echo "${TMP_ENV}" | grep -E "^XLSMPOPTS" > >(tee -a ${HPCBIND_LOG})
if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then
echo "[BINDINGS]" > >(tee -a ${HPCBIND_LOG})
hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" --only pu > >(tee -a ${HPCBIND_LOG})
hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" --no-io --no-bridges > >(tee -a ${HPCBIND_LOG})
else
echo "Unable to show bindings, hwloc not available." > >(tee -a ${HPCBIND_LOG})
fi

View File

@ -39,6 +39,12 @@ cuda_args=""
# Arguments for both NVCC and Host compiler
shared_args=""
# Argument -c
compile_arg=""
# Argument -o <obj>
output_arg=""
# Linker arguments
xlinker_args=""
@ -66,6 +72,7 @@ dry_run=0
# Skip NVCC compilation and use host compiler directly
host_only=0
host_only_args=""
# Enable workaround for CUDA 6.5 for pragma ident
replace_pragma_ident=0
@ -81,6 +88,11 @@ optimization_applied=0
# Check if we have -std=c++X or --std=c++X already
stdcxx_applied=0
# Run nvcc a second time to generate dependencies if needed
depfile_separate=0
depfile_output_arg=""
depfile_target_arg=""
#echo "Arguments: $# $@"
while [ $# -gt 0 ]
@ -112,12 +124,31 @@ do
fi
;;
#Handle shared args (valid for both nvcc and the host compiler)
-D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
-D*|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
shared_args="$shared_args $1"
;;
#Handle shared args that have an argument
-o|-MT)
shared_args="$shared_args $1 $2"
#Handle compilation argument
-c)
compile_arg="$1"
;;
#Handle output argument
-o)
output_arg="$output_arg $1 $2"
shift
;;
# Handle depfile arguments. We map them to a separate call to nvcc.
-MD|-MMD)
depfile_separate=1
host_only_args="$host_only_args $1"
;;
-MF)
depfile_output_arg="-o $2"
host_only_args="$host_only_args $1 $2"
shift
;;
-MT)
depfile_target_arg="$1 $2"
host_only_args="$host_only_args $1 $2"
shift
;;
#Handle known nvcc args
@ -242,7 +273,7 @@ if [ $first_xcompiler_arg -eq 0 ]; then
fi
#Compose host only command
host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
host_command="$host_compiler $shared_args $host_only_args $compile_arg $output_arg $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
if [ $replace_pragma_ident -eq 1 ]; then
@ -274,10 +305,21 @@ else
host_command="$host_command $object_files"
fi
if [ $depfile_separate -eq 1 ]; then
# run nvcc a second time to generate dependencies (without compiling)
nvcc_depfile_command="$nvcc_command -M $depfile_target_arg $depfile_output_arg"
else
nvcc_depfile_command=""
fi
nvcc_command="$nvcc_command $compile_arg $output_arg"
#Print command for dryrun
if [ $dry_run -eq 1 ]; then
if [ $host_only -eq 1 ]; then
echo $host_command
elif [ -n "$nvcc_depfile_command" ]; then
echo $nvcc_command "&&" $nvcc_depfile_command
else
echo $nvcc_command
fi
@ -287,6 +329,8 @@ fi
#Run compilation command
if [ $host_only -eq 1 ]; then
$host_command
elif [ -n "$nvcc_depfile_command" ]; then
$nvcc_command && $nvcc_depfile_command
else
$nvcc_command
fi

View File

@ -0,0 +1,8 @@
ifndef KOKKOS_PATH
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH))..
endif
include $(KOKKOS_PATH)/Makefile.kokkos
include $(KOKKOS_PATH)/core/src/Makefile.generate_header_lists
include $(KOKKOS_PATH)/core/src/Makefile.generate_build_files

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,219 @@
# kokkos_generated_settings.cmake includes the kokkos library itself in KOKKOS_LIBS
# which we do not want to use for the cmake builds so clean this up
string(REGEX REPLACE "-lkokkos" "" KOKKOS_LIBS ${KOKKOS_LIBS})
############################ Detect if submodule ###############################
#
# With thanks to StackOverflow:
# http://stackoverflow.com/questions/25199677/how-to-detect-if-current-scope-has-a-parent-in-cmake
#
get_directory_property(HAS_PARENT PARENT_DIRECTORY)
if(HAS_PARENT)
message(STATUS "Submodule build")
SET(KOKKOS_HEADER_DIR "include/kokkos")
else()
message(STATUS "Standalone build")
SET(KOKKOS_HEADER_DIR "include")
endif()
################################ Handle the actual build #######################
SET(INSTALL_LIB_DIR lib CACHE PATH "Installation directory for libraries")
SET(INSTALL_BIN_DIR bin CACHE PATH "Installation directory for executables")
SET(INSTALL_INCLUDE_DIR ${KOKKOS_HEADER_DIR} CACHE PATH
"Installation directory for header files")
IF(WIN32 AND NOT CYGWIN)
SET(DEF_INSTALL_CMAKE_DIR CMake)
ELSE()
SET(DEF_INSTALL_CMAKE_DIR lib/CMake/Kokkos)
ENDIF()
SET(INSTALL_CMAKE_DIR ${DEF_INSTALL_CMAKE_DIR} CACHE PATH
"Installation directory for CMake files")
# Make relative paths absolute (needed later on)
FOREACH(p LIB BIN INCLUDE CMAKE)
SET(var INSTALL_${p}_DIR)
IF(NOT IS_ABSOLUTE "${${var}}")
SET(${var} "${CMAKE_INSTALL_PREFIX}/${${var}}")
ENDIF()
ENDFOREACH()
# set up include-directories
SET (Kokkos_INCLUDE_DIRS
${Kokkos_SOURCE_DIR}/core/src
${Kokkos_SOURCE_DIR}/containers/src
${Kokkos_SOURCE_DIR}/algorithms/src
${Kokkos_BINARY_DIR} # to find KokkosCore_config.h
${KOKKOS_INCLUDE_DIRS}
)
# pass include dirs back to parent scope
if(HAS_PARENT)
SET(Kokkos_INCLUDE_DIRS_RET ${Kokkos_INCLUDE_DIRS} PARENT_SCOPE)
else()
SET(Kokkos_INCLUDE_DIRS_RET ${Kokkos_INCLUDE_DIRS})
endif()
INCLUDE_DIRECTORIES(${Kokkos_INCLUDE_DIRS})
IF(KOKKOS_SEPARATE_LIBS)
# Sources come from makefile-generated kokkos_generated_settings.cmake file
# Separate libs need to separate the sources
set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC})
# kokkoscore
ADD_LIBRARY(
kokkoscore
${KOKKOS_CORE_SRCS}
)
target_compile_options(
kokkoscore
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_CXX_FLAGS}>
)
# Install the kokkoscore library
INSTALL (TARGETS kokkoscore
EXPORT KokkosTargets
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
)
TARGET_LINK_LIBRARIES(
kokkoscore
${KOKKOS_LD_FLAGS}
${KOKKOS_EXTRA_LIBS_LIST}
)
# kokkoscontainers
if (DEFINED KOKKOS_CONTAINERS_SRCS)
ADD_LIBRARY(
kokkoscontainers
${KOKKOS_CONTAINERS_SRCS}
)
endif()
TARGET_LINK_LIBRARIES(
kokkoscontainers
kokkoscore
)
# Install the kokkocontainers library
INSTALL (TARGETS kokkoscontainers
EXPORT KokkosTargets
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
# kokkosalgorithms - Build as interface library since no source files.
ADD_LIBRARY(
kokkosalgorithms
INTERFACE
)
target_include_directories(
kokkosalgorithms
INTERFACE ${Kokkos_SOURCE_DIR}/algorithms/src
)
TARGET_LINK_LIBRARIES(
kokkosalgorithms
INTERFACE kokkoscore
)
# Install the kokkoalgorithms library
INSTALL (TARGETS kokkosalgorithms
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
SET (Kokkos_LIBRARIES_NAMES kokkoscore kokkoscontainers kokkosalgorithms)
ELSE()
# kokkos
ADD_LIBRARY(
kokkos
${KOKKOS_CORE_SRCS}
${KOKKOS_CONTAINERS_SRCS}
)
target_compile_options(
kokkos
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_CXX_FLAGS}>
)
TARGET_LINK_LIBRARIES(
kokkos
${KOKKOS_LD_FLAGS}
${KOKKOS_EXTRA_LIBS_LIST}
)
# Install the kokkos library
INSTALL (TARGETS kokkos
EXPORT KokkosTargets
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
SET (Kokkos_LIBRARIES_NAMES kokkos)
endif() # KOKKOS_SEPARATE_LIBS
# Install the kokkos headers
INSTALL (DIRECTORY
EXPORT KokkosTargets
${Kokkos_SOURCE_DIR}/core/src/
DESTINATION ${KOKKOS_HEADER_DIR}
FILES_MATCHING PATTERN "*.hpp"
)
INSTALL (DIRECTORY
EXPORT KokkosTargets
${Kokkos_SOURCE_DIR}/containers/src/
DESTINATION ${KOKKOS_HEADER_DIR}
FILES_MATCHING PATTERN "*.hpp"
)
INSTALL (DIRECTORY
EXPORT KokkosTargets
${Kokkos_SOURCE_DIR}/algorithms/src/
DESTINATION ${KOKKOS_HEADER_DIR}
FILES_MATCHING PATTERN "*.hpp"
)
INSTALL (FILES
${Kokkos_BINARY_DIR}/KokkosCore_config.h
DESTINATION ${KOKKOS_HEADER_DIR}
)
# Add all targets to the build-tree export set
export(TARGETS ${Kokkos_LIBRARIES_NAMES}
FILE "${Kokkos_BINARY_DIR}/KokkosTargets.cmake")
# Export the package for use from the build-tree
# (this registers the build-tree with a global CMake-registry)
export(PACKAGE Kokkos)
# Create the KokkosConfig.cmake and KokkosConfigVersion files
file(RELATIVE_PATH REL_INCLUDE_DIR "${INSTALL_CMAKE_DIR}"
"${INSTALL_INCLUDE_DIR}")
# ... for the build tree
set(CONF_INCLUDE_DIRS "${Kokkos_SOURCE_DIR}" "${Kokkos_BINARY_DIR}")
configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in
"${Kokkos_BINARY_DIR}/KokkosConfig.cmake" @ONLY)
# ... for the install tree
set(CONF_INCLUDE_DIRS "\${Kokkos_CMAKE_DIR}/${REL_INCLUDE_DIR}")
configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in
"${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" @ONLY)
# Install the KokkosConfig.cmake and KokkosConfigVersion.cmake
install(FILES
"${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake"
DESTINATION "${INSTALL_CMAKE_DIR}")
#This seems not to do anything?
#message(STATUS "KokkosTargets: " ${KokkosTargets})
# Install the export set for use with the install-tree
INSTALL(EXPORT KokkosTargets DESTINATION
"${INSTALL_CMAKE_DIR}")

View File

@ -0,0 +1,345 @@
################################### FUNCTIONS ##################################
# List of functions
# set_kokkos_cxx_compiler
# set_kokkos_cxx_standard
# set_kokkos_srcs
#-------------------------------------------------------------------------------
# function(set_kokkos_cxx_compiler)
# Sets the following compiler variables that are analogous to the CMAKE_*
# versions. We add the ability to detect NVCC (really nvcc_wrapper).
# KOKKOS_CXX_COMPILER
# KOKKOS_CXX_COMPILER_ID
# KOKKOS_CXX_COMPILER_VERSION
#
# Inputs:
# KOKKOS_ENABLE_CUDA
# CMAKE_CXX_COMPILER
# CMAKE_CXX_COMPILER_ID
# CMAKE_CXX_COMPILER_VERSION
#
# Also verifies the compiler version meets the minimum required by Kokkos.
function(set_kokkos_cxx_compiler)
# Since CMake doesn't recognize the nvcc compiler until 3.8, we use our own
# version of the CMake variables and detect nvcc ourselves. Initially set to
# the CMake variable values.
set(INTERNAL_CXX_COMPILER ${CMAKE_CXX_COMPILER})
set(INTERNAL_CXX_COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
set(INTERNAL_CXX_COMPILER_VERSION ${CMAKE_CXX_COMPILER_VERSION})
# Check if the compiler is nvcc (which really means nvcc_wrapper).
execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version
COMMAND grep nvcc
COMMAND wc -l
OUTPUT_VARIABLE INTERNAL_HAVE_COMPILER_NVCC
OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REGEX REPLACE "^ +" ""
INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC})
if(INTERNAL_HAVE_COMPILER_NVCC)
# Set the compiler id to nvcc. We use the value used by CMake 3.8.
set(INTERNAL_CXX_COMPILER_ID NVIDIA)
# Set nvcc's compiler version.
execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version
COMMAND grep release
OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REGEX MATCH "[0-9]+\.[0-9]+\.[0-9]+$"
INTERNAL_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION})
endif()
# Enforce the minimum compilers supported by Kokkos.
set(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:")
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang 3.5.2 or higher")
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 4.8.4 or higher")
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 15.0.2 or higher")
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 7.0.28 or higher")
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n PGI 17.1 or higher\n")
if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang)
if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 3.5.2)
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
endif()
elseif(INTERNAL_CXX_COMPILER_ID STREQUAL GNU)
if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.8.4)
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
endif()
elseif(INTERNAL_CXX_COMPILER_ID STREQUAL Intel)
if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 15.0.2)
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
endif()
elseif(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA)
if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 7.0.28)
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
endif()
elseif(INTERNAL_CXX_COMPILER_ID STREQUAL PGI)
if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 17.1)
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
endif()
endif()
# Enforce that extensions are turned off for nvcc_wrapper.
if(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA)
if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON)
message(FATAL_ERROR "NVCC doesn't support C++ extensions. Set CMAKE_CXX_EXTENSIONS to OFF in your CMakeLists.txt.")
endif()
endif()
if(KOKKOS_ENABLE_CUDA)
# Enforce that the compiler can compile CUDA code.
if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang)
if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.0.0)
message(FATAL_ERROR "Compiling CUDA code directly with Clang requires version 4.0.0 or higher.")
endif()
elseif(NOT INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA)
message(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang.")
endif()
endif()
set(KOKKOS_CXX_COMPILER ${INTERNAL_CXX_COMPILER} PARENT_SCOPE)
set(KOKKOS_CXX_COMPILER_ID ${INTERNAL_CXX_COMPILER_ID} PARENT_SCOPE)
set(KOKKOS_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION} PARENT_SCOPE)
endfunction()
#-------------------------------------------------------------------------------
# function(set_kokkos_cxx_standard)
# Transitively enforces that the appropriate CXX standard compile flags (C++11
# or above) are added to targets that use the Kokkos library. Compile features
# are used if possible. Otherwise, the appropriate flags are added to
# KOKKOS_CXX_FLAGS. Values set by the user to CMAKE_CXX_STANDARD and
# CMAKE_CXX_EXTENSIONS are honored.
#
# Outputs:
# KOKKOS_CXX11_FEATURES
# KOKKOS_CXX_FLAGS
#
# Inputs:
# KOKKOS_CXX_COMPILER
# KOKKOS_CXX_COMPILER_ID
# KOKKOS_CXX_COMPILER_VERSION
#
function(set_kokkos_cxx_standard)
# The following table lists the versions of CMake that supports CXX_STANDARD
# and the CXX compile features for different compilers. The versions are
# based on CMake documentation, looking at CMake code, and verifying by
# testing with specific CMake versions.
#
# COMPILER CXX_STANDARD Compile Features
# ---------------------------------------------------------------
# Clang 3.1 3.1
# GNU 3.1 3.2
# AppleClang 3.2 3.2
# Intel 3.6 3.6
# Cray No No
# PGI No No
# XL No No
#
# For compiling CUDA code using nvcc_wrapper, we will use the host compiler's
# flags for turning on C++11. Since for compiler ID and versioning purposes
# CMake recognizes the host compiler when calling nvcc_wrapper, this just
# works. Both NVCC and nvcc_wrapper only recognize '-std=c++11' which means
# that we can only use host compilers for CUDA builds that use those flags.
# It also means that extensions (gnu++11) can't be turned on for CUDA builds.
# Check if we can use compile features.
if(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
if(CMAKE_CXX_COMPILER_ID STREQUAL Clang)
if(NOT CMAKE_VERSION VERSION_LESS 3.1)
set(INTERNAL_USE_COMPILE_FEATURES ON)
endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang OR CMAKE_CXX_COMPILER_ID STREQUAL GNU)
if(NOT CMAKE_VERSION VERSION_LESS 3.2)
set(INTERNAL_USE_COMPILE_FEATURES ON)
endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
if(NOT CMAKE_VERSION VERSION_LESS 3.6)
set(INTERNAL_USE_COMPILE_FEATURES ON)
endif()
endif()
endif()
if(INTERNAL_USE_COMPILE_FEATURES)
# Use the compile features aspect of CMake to transitively cause C++ flags
# to populate to user code.
# I'm using a hack by requiring features that I know force the lowest version
# of the compilers we want to support. Clang 3.3 and later support all of
# the C++11 standard. With CMake 3.8 and higher, we could switch to using
# cxx_std_11.
set(KOKKOS_CXX11_FEATURES
cxx_nonstatic_member_init # Forces GCC 4.7 or later and Intel 14.0 or later.
PARENT_SCOPE
)
else()
# CXX compile features are not yet implemented for this combination of
# compiler and version of CMake.
if(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang)
# Versions of CMAKE before 3.2 don't support CXX_STANDARD or C++ compile
# features for the AppleClang compiler. Set compiler flags transitively
# here such that they trickle down to a call to target_compile_options().
# The following two blocks of code were copied from
# /Modules/Compiler/AppleClang-CXX.cmake from CMake 3.7.2 and then
# modified.
if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0)
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11")
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=gnu++11")
endif()
if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.1)
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++14")
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++14")
elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1)
# AppleClang 5.0 knows this flag, but does not set a __cplusplus macro
# greater than 201103L.
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++1y")
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++1y")
endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
# Versions of CMAKE before 3.6 don't support CXX_STANDARD or C++ compile
# features for the Intel compiler. Set compiler flags transitively here
# such that they trickle down to a call to target_compile_options().
# The following three blocks of code were copied from
# /Modules/Compiler/Intel-CXX.cmake from CMake 3.7.2 and then modified.
if("x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC")
set(_std -Qstd)
set(_ext c++)
else()
set(_std -std)
set(_ext gnu++)
endif()
if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.2)
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++14")
# TODO: There is no gnu++14 value supported; figure out what to do.
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++14")
elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.0)
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++1y")
# TODO: There is no gnu++14 value supported; figure out what to do.
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++1y")
endif()
if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13.0)
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++11")
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}11")
elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12.1)
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++0x")
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}0x")
endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL Cray)
# CMAKE doesn't support CXX_STANDARD or C++ compile features for the Cray
# compiler. Set compiler options transitively here such that they trickle
# down to a call to target_compile_options().
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-hstd=c++11")
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-hstd=c++11")
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-hstd=c++11")
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-hstd=c++11")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL PGI)
# CMAKE doesn't support CXX_STANDARD or C++ compile features for the PGI
# compiler. Set compiler options transitively here such that they trickle
# down to a call to target_compile_options().
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "--c++11")
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "--c++11")
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "--c++11")
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "--c++11")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL XL)
# CMAKE doesn't support CXX_STANDARD or C++ compile features for the XL
# compiler. Set compiler options transitively here such that they trickle
# down to a call to target_compile_options().
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11")
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=c++11")
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++11")
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=c++11")
else()
# Assume GNU. CMAKE_CXX_STANDARD is handled correctly by CMake 3.1 and
# above for this compiler. If the user explicitly requests a C++
# standard, CMake takes care of it. If not, transitively require C++11.
if(NOT CMAKE_CXX_STANDARD)
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION ${CMAKE_CXX11_STANDARD_COMPILE_OPTION})
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION ${CMAKE_CXX11_EXTENSION_COMPILE_OPTION})
endif()
endif()
# Set the C++ standard info for Kokkos respecting user set values for
# CMAKE_CXX_STANDARD and CMAKE_CXX_EXTENSIONS.
# Only use cxx extension if explicitly requested
if(CMAKE_CXX_STANDARD EQUAL 14)
if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON)
set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_EXTENSION_COMPILE_OPTION})
else()
set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_STANDARD_COMPILE_OPTION})
endif()
elseif(CMAKE_CXX_STANDARD EQUAL 11)
if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON)
set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION})
else()
set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION})
endif()
else()
# The user didn't explicitly request a standard, transitively require
# C++11 respecting CMAKE_CXX_EXTENSIONS.
if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON)
set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION})
else()
set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION})
endif()
endif()
set(KOKKOS_CXX_FLAGS ${INTERNAL_CXX_FLAGS} PARENT_SCOPE)
endif()
endfunction()
#-------------------------------------------------------------------------------
# function(set_kokkos_sources)
# Takes a list of sources for kokkos (e.g., KOKKOS_SRC from Makefile.kokkos and
# put it into kokkos_generated_settings.cmake) and sorts the files into the subpackages or
# separate_libraries. This is core and containers (algorithms is pure header
# files).
#
# Inputs:
# KOKKOS_SRC
#
# Outputs:
# KOKKOS_CORE_SRCS
# KOKKOS_CONTAINERS_SRCS
#
function(set_kokkos_srcs)
set(opts ) # no-value args
set(oneValArgs )
set(multValArgs KOKKOS_SRC) # e.g., lists
cmake_parse_arguments(IN "${opts}" "${oneValArgs}" "${multValArgs}" ${ARGN})
foreach(sfile ${IN_KOKKOS_SRC})
string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" stripfile "${sfile}")
string(REPLACE "/" ";" striplist "${stripfile}")
list(GET striplist 0 firstdir)
if(${firstdir} STREQUAL "core")
list(APPEND KOKKOS_CORE_SRCS ${sfile})
else()
list(APPEND KOKKOS_CONTAINERS_SRCS ${sfile})
endif()
endforeach()
set(KOKKOS_CORE_SRCS ${KOKKOS_CORE_SRCS} PARENT_SCOPE)
set(KOKKOS_CONTAINERS_SRCS ${KOKKOS_CONTAINERS_SRCS} PARENT_SCOPE)
return()
endfunction()
# Setting a default value if it is not already set
macro(set_kokkos_default_default VARIABLE DEFAULT)
IF( "${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}" STREQUAL "" )
IF( "${KOKKOS_ENABLE_${VARIABLE}}" STREQUAL "" )
set(KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT ${DEFAULT})
# MESSAGE(WARNING "Set: KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT to ${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}")
ELSE()
set(KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT ${KOKKOS_ENABLE_${VARIABLE}})
# MESSAGE(WARNING "Set: KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT to ${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}")
ENDIF()
ENDIF()
UNSET(KOKKOS_ENABLE_${VARIABLE} CACHE)
endmacro()

View File

@ -0,0 +1,365 @@
########################## NOTES ###############################################
# List the options for configuring kokkos using CMake method of doing it.
# These options then get mapped onto KOKKOS_SETTINGS environment variable by
# kokkos_settings.cmake. It is separate to allow other packages to override
# these variables (e.g., TriBITS).
########################## AVAILABLE OPTIONS ###################################
# Use lists for documentation, verification, and programming convenience
# All CMake options of the type KOKKOS_ENABLE_*
set(KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST)
list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST
Serial
OpenMP
Pthread
Qthread
Cuda
ROCm
HWLOC
MEMKIND
LIBRT
Cuda_Lambda
Cuda_Relocatable_Device_Code
Cuda_UVM
Cuda_LDG_Intrinsic
Debug
Debug_DualView_Modify_Check
Debug_Bounds_Checkt
Compiler_Warnings
Profiling
Profiling_Load_Print
Aggressive_Vectorization
)
#-------------------------------------------------------------------------------
#------------------------------- Recognize CamelCase Options ---------------------------
#-------------------------------------------------------------------------------
foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST})
string(TOUPPER ${opt} OPT )
IF(DEFINED Kokkos_ENABLE_${opt})
IF(DEFINED KOKKOS_ENABLE_${OPT})
IF(NOT ("${KOKKOS_ENABLE_${OPT}}" STREQUAL "${Kokkos_ENABLE_${opt}}"))
IF(DEFINED KOKKOS_ENABLE_${OPT}_INTERNAL)
MESSAGE(WARNING "Defined both Kokkos_ENABLE_${opt}=[${Kokkos_ENABLE_${opt}}] and KOKKOS_ENABLE_${OPT}=[${KOKKOS_ENABLE_${OPT}}] and they differ! Could be caused by old CMakeCache Variable. Run CMake again and warning should disappear. If not you are truly setting both variables.")
IF(NOT ("${Kokkos_ENABLE_${opt}}" STREQUAL "${KOKKOS_ENABLE_${OPT}_INTERNAL}"))
UNSET(KOKKOS_ENABLE_${OPT} CACHE)
SET(KOKKOS_ENABLE_${OPT} ${Kokkos_ENABLE_${opt}})
MESSAGE(WARNING "SET BOTH VARIABLES KOKKOS_ENABLE_${OPT}: ${KOKKOS_ENABLE_${OPT}}")
ELSE()
SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}})
ENDIF()
ELSE()
MESSAGE(FATAL_ERROR "Defined both Kokkos_ENABLE_${opt}=[${Kokkos_ENABLE_${opt}}] and KOKKOS_ENABLE_${OPT}=[${KOKKOS_ENABLE_${OPT}}] and they differ!")
ENDIF()
ENDIF()
ELSE()
SET(KOKKOS_INTERNAL_ENABLE_${OPT}_DEFAULT ${Kokkos_ENABLE_${opt}})
ENDIF()
ENDIF()
endforeach()
IF(DEFINED Kokkos_Arch)
IF(DEFINED KOKKOS_ARCH)
IF(NOT (${KOKKOS_ARCH} STREQUAL "${Kokkos_Arch}"))
MESSAGE(FATAL_ERROR "Defined both Kokkos_Arch and KOKKOS_ARCH and they differ!")
ENDIF()
ELSE()
SET(KOKKOS_ARCH ${Kokkos_Arch})
ENDIF()
ENDIF()
#-------------------------------------------------------------------------------
# List of possible host architectures.
#-------------------------------------------------------------------------------
set(KOKKOS_ARCH_LIST)
list(APPEND KOKKOS_ARCH_LIST
None # No architecture optimization
AMDAVX # (HOST) AMD chip
ARMv80 # (HOST) ARMv8.0 Compatible CPU
ARMv81 # (HOST) ARMv8.1 Compatible CPU
ARMv8-ThunderX # (HOST) ARMv8 Cavium ThunderX CPU
WSM # (HOST) Intel Westmere CPU
SNB # (HOST) Intel Sandy/Ivy Bridge CPUs
HSW # (HOST) Intel Haswell CPUs
BDW # (HOST) Intel Broadwell Xeon E-class CPUs
SKX # (HOST) Intel Sky Lake Xeon E-class HPC CPUs (AVX512)
KNC # (HOST) Intel Knights Corner Xeon Phi
KNL # (HOST) Intel Knights Landing Xeon Phi
BGQ # (HOST) IBM Blue Gene Q
Power7 # (HOST) IBM POWER7 CPUs
Power8 # (HOST) IBM POWER8 CPUs
Power9 # (HOST) IBM POWER9 CPUs
Kepler # (GPU) NVIDIA Kepler default (generation CC 3.5)
Kepler30 # (GPU) NVIDIA Kepler generation CC 3.0
Kepler32 # (GPU) NVIDIA Kepler generation CC 3.2
Kepler35 # (GPU) NVIDIA Kepler generation CC 3.5
Kepler37 # (GPU) NVIDIA Kepler generation CC 3.7
Maxwell # (GPU) NVIDIA Maxwell default (generation CC 5.0)
Maxwell50 # (GPU) NVIDIA Maxwell generation CC 5.0
Maxwell52 # (GPU) NVIDIA Maxwell generation CC 5.2
Maxwell53 # (GPU) NVIDIA Maxwell generation CC 5.3
Pascal60 # (GPU) NVIDIA Pascal generation CC 6.0
Pascal61 # (GPU) NVIDIA Pascal generation CC 6.1
)
# List of possible device architectures.
# The case and spelling here needs to match Makefile.kokkos
set(KOKKOS_DEVICES_LIST)
# Options: Cuda,ROCm,OpenMP,Pthread,Qthreads,Serial
list(APPEND KOKKOS_DEVICES_LIST
Cuda # NVIDIA GPU -- see below
OpenMP # OpenMP
Pthread # pthread
Qthreads # qthreads
Serial # serial
ROCm # Relocatable device code
)
# List of possible TPLs for Kokkos
# From Makefile.kokkos: Options: hwloc,librt,experimental_memkind
set(KOKKOS_USE_TPLS_LIST)
list(APPEND KOKKOS_USE_TPLS_LIST
HWLOC # hwloc
LIBRT # librt
MEMKIND # experimental_memkind
)
# Map of cmake variables to Makefile variables
set(KOKKOS_INTERNAL_HWLOC hwloc)
set(KOKKOS_INTERNAL_LIBRT librt)
set(KOKKOS_INTERNAL_MEMKIND experimental_memkind)
# List of possible Advanced options
set(KOKKOS_OPTIONS_LIST)
list(APPEND KOKKOS_OPTIONS_LIST
AGGRESSIVE_VECTORIZATION
DISABLE_PROFILING
DISABLE_DUALVIEW_MODIFY_CHECK
ENABLE_PROFILE_LOAD_PRINT
)
# Map of cmake variables to Makefile variables
set(KOKKOS_INTERNAL_LDG_INTRINSIC use_ldg)
set(KOKKOS_INTERNAL_UVM librt)
set(KOKKOS_INTERNAL_RELOCATABLE_DEVICE_CODE rdc)
#-------------------------------------------------------------------------------
# List of possible Options for CUDA
#-------------------------------------------------------------------------------
# From Makefile.kokkos: Options: use_ldg,force_uvm,rdc
set(KOKKOS_CUDA_OPTIONS_LIST)
list(APPEND KOKKOS_CUDA_OPTIONS_LIST
LDG_INTRINSIC # use_ldg
UVM # force_uvm
RELOCATABLE_DEVICE_CODE # rdc
LAMBDA # enable_lambda
)
# Map of cmake variables to Makefile variables
set(KOKKOS_INTERNAL_LDG_INTRINSIC use_ldg)
set(KOKKOS_INTERNAL_UVM force_uvm)
set(KOKKOS_INTERNAL_RELOCATABLE_DEVICE_CODE rdc)
set(KOKKOS_INTERNAL_LAMBDA enable_lambda)
#-------------------------------------------------------------------------------
#------------------------------- Create doc strings ----------------------------
#-------------------------------------------------------------------------------
set(tmpr "\n ")
string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_ARCH_DOCSTR "${KOKKOS_ARCH_LIST}")
# This would be useful, but we use Foo_ENABLE mechanisms
#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_DEVICES_DOCSTR "${KOKKOS_DEVICES_LIST}")
#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_USE_TPLS_DOCSTR "${KOKKOS_USE_TPLS_LIST}")
#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_CUDA_OPTIONS_DOCSTR "${KOKKOS_CUDA_OPTIONS_LIST}")
#-------------------------------------------------------------------------------
#------------------------------- GENERAL OPTIONS -------------------------------
#-------------------------------------------------------------------------------
# Setting this variable to a value other than "None" can improve host
# performance by turning on architecture specific code.
# NOT SET is used to determine if the option is passed in. It is reset to
# default "None" down below.
set(KOKKOS_ARCH "NOT_SET" CACHE STRING
"Optimize for specific host architecture. Options are: ${KOKKOS_INTERNAL_ARCH_DOCSTR}")
# Whether to build separate libraries or now
set(KOKKOS_SEPARATE_LIBS OFF CACHE BOOL "OFF = kokkos. ON = kokkoscore, kokkoscontainers, and kokkosalgorithms.")
# Qthreads options.
set(KOKKOS_QTHREADS_DIR "" CACHE PATH "Location of Qthreads library.")
#-------------------------------------------------------------------------------
#------------------------------- KOKKOS_DEVICES --------------------------------
#-------------------------------------------------------------------------------
# Figure out default settings
IF(Trilinos_ENABLE_Kokkos)
set_kokkos_default_default(SERIAL ON)
set_kokkos_default_default(PTHREAD OFF)
IF(TPL_ENABLE_QTHREAD)
set_kokkos_default_default(QTHREADS ${TPL_ENABLE_QTHREAD})
ELSE()
set_kokkos_default_default(QTHREADS OFF)
ENDIF()
IF(Trilinos_ENABLE_OpenMP)
set_kokkos_default_default(OPENMP ${Trilinos_ENABLE_OpenMP})
ELSE()
set_kokkos_default_default(OPENMP OFF)
ENDIF()
IF(TPL_ENABLE_CUDA)
set_kokkos_default_default(CUDA ${TPL_ENABLE_CUDA})
ELSE()
set_kokkos_default_default(CUDA OFF)
ENDIF()
set_kokkos_default_default(ROCM OFF)
ELSE()
set_kokkos_default_default(SERIAL ON)
set_kokkos_default_default(OPENMP OFF)
set_kokkos_default_default(PTHREAD OFF)
set_kokkos_default_default(QTHREAD OFF)
set_kokkos_default_default(CUDA OFF)
set_kokkos_default_default(ROCM OFF)
ENDIF()
# Set which Kokkos backend to use.
# These are the actual options that define the settings.
set(KOKKOS_ENABLE_SERIAL ${KOKKOS_INTERNAL_ENABLE_SERIAL_DEFAULT} CACHE BOOL "Whether to enable the Kokkos::Serial device. This device executes \"parallel\" kernels sequentially on a single CPU thread. It is enabled by default. If you disable this device, please enable at least one other CPU device, such as Kokkos::OpenMP or Kokkos::Threads.")
set(KOKKOS_ENABLE_OPENMP ${KOKKOS_INTERNAL_ENABLE_OPENMP_DEFAULT} CACHE BOOL "Enable OpenMP support in Kokkos." FORCE)
set(KOKKOS_ENABLE_PTHREAD ${KOKKOS_INTERNAL_ENABLE_PTHREAD_DEFAULT} CACHE BOOL "Enable Pthread support in Kokkos.")
set(KOKKOS_ENABLE_QTHREADS ${KOKKOS_INTERNAL_ENABLE_QTHREADS_DEFAULT} CACHE BOOL "Enable Qthreads support in Kokkos.")
set(KOKKOS_ENABLE_CUDA ${KOKKOS_INTERNAL_ENABLE_CUDA_DEFAULT} CACHE BOOL "Enable CUDA support in Kokkos.")
set(KOKKOS_ENABLE_ROCM ${KOKKOS_INTERNAL_ENABLE_ROCM_DEFAULT} CACHE BOOL "Enable ROCm support in Kokkos.")
#-------------------------------------------------------------------------------
#------------------------------- KOKKOS DEBUG and PROFILING --------------------
#-------------------------------------------------------------------------------
# Debug related options enable compiler warnings
set_kokkos_default_default(DEBUG OFF)
set(KOKKOS_ENABLE_DEBUG ${KOKKOS_INTERNAL_ENABLE_DEBUG_DEFAULT} CACHE BOOL "Enable Kokkos Debug.")
# From Makefile.kokkos: Advanced Options:
#compiler_warnings, aggressive_vectorization, disable_profiling, disable_dualview_modify_check, enable_profile_load_print
set_kokkos_default_default(COMPILER_WARNINGS OFF)
set(KOKKOS_ENABLE_COMPILER_WARNINGS ${KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS_DEFAULT} CACHE BOOL "Enable compiler warnings.")
set_kokkos_default_default(DEBUG_DUALVIEW_MODIFY_CHECK OFF)
set(KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK ${KOKKOS_INTERNAL_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK_DEFAULT} CACHE BOOL "Enable dualview modify check.")
# Enable aggressive vectorization.
set_kokkos_default_default(AGGRESSIVE_VECTORIZATION OFF)
set(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION ${KOKKOS_INTERNAL_ENABLE_AGGRESSIVE_VECTORIZATION_DEFAULT} CACHE BOOL "Enable aggressive vectorization.")
# Enable profiling.
set_kokkos_default_default(PROFILING ON)
set(KOKKOS_ENABLE_PROFILING ${KOKKOS_INTERNAL_ENABLE_PROFILING_DEFAULT} CACHE BOOL "Enable profiling.")
set_kokkos_default_default(PROFILING_LOAD_PRINT OFF)
set(KOKKOS_ENABLE_PROFILING_LOAD_PRINT ${KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT_DEFAULT} CACHE BOOL "Enable profile load print.")
#-------------------------------------------------------------------------------
#------------------------------- KOKKOS_USE_TPLS -------------------------------
#-------------------------------------------------------------------------------
# Enable hwloc library.
# Figure out default:
IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HWLOC)
set_kokkos_default_default(HWLOC ON)
ELSE()
set_kokkos_default_default(HWLOC OFF)
ENDIF()
set(KOKKOS_ENABLE_HWLOC ${KOKKOS_INTERNAL_ENABLE_HWLOC_DEFAULT} CACHE BOOL "Enable hwloc for better process placement.")
set(KOKKOS_HWLOC_DIR "" CACHE PATH "Location of hwloc library. (kokkos tpl)")
# Enable memkind library.
set_kokkos_default_default(MEMKIND OFF)
set(KOKKOS_ENABLE_MEMKIND ${KOKKOS_INTERNAL_ENABLE_MEMKIND_DEFAULT} CACHE BOOL "Enable memkind. (kokkos tpl)")
set(KOKKOS_MEMKIND_DIR "" CACHE PATH "Location of memkind library. (kokkos tpl)")
# Enable rt library.
IF(Trilinos_ENABLE_Kokkos)
IF(DEFINED TPL_ENABLE_LIBRT)
set_kokkos_default_default(LIBRT ${TPL_ENABLE_LIBRT})
ELSE()
set_kokkos_default_default(LIBRT OFF)
ENDIF()
ELSE()
set_kokkos_default_default(LIBRT ON)
ENDIF()
set(KOKKOS_ENABLE_LIBRT ${KOKKOS_INTERNAL_ENABLE_LIBRT_DEFAULT} CACHE BOOL "Enable librt for more precise timer. (kokkos tpl)")
#-------------------------------------------------------------------------------
#------------------------------- KOKKOS_CUDA_OPTIONS ---------------------------
#-------------------------------------------------------------------------------
# CUDA options.
# Set Defaults
set_kokkos_default_default(CUDA_LDG_INTRINSIC_DEFAULT OFF)
set_kokkos_default_default(CUDA_UVM_DEFAULT OFF)
set_kokkos_default_default(CUDA_RELOCATABLE_DEVICE_CODE OFF)
IF(Trilinos_ENABLE_Kokkos)
IF(KOKKOS_ENABLE_CUDA)
find_package(CUDA)
ENDIF()
IF (DEFINED CUDA_VERSION)
IF (CUDA_VERSION VERSION_GREATER "7.0")
set_kokkos_default_default(CUDA_LAMBDA ON)
ELSE()
set_kokkos_default_default(CUDA_LAMBDA OFF)
ENDIF()
ENDIF()
ELSE()
set_kokkos_default_default(CUDA_LAMBDA OFF)
ENDIF()
# Set actual options
set(KOKKOS_CUDA_DIR "" CACHE PATH "Location of CUDA library. Defaults to where nvcc installed.")
set(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC ${KOKKOS_INTERNAL_ENABLE_CUDA_LDG_INTRINSIC_DEFAULT} CACHE BOOL "Enable CUDA LDG. (cuda option)")
set(KOKKOS_ENABLE_CUDA_UVM ${KOKKOS_INTERNAL_ENABLE_CUDA_UVM_DEFAULT} CACHE BOOL "Enable CUDA unified virtual memory.")
set(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE ${KOKKOS_INTERNAL_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE_DEFAULT} CACHE BOOL "Enable relocatable device code for CUDA. (cuda option)")
set(KOKKOS_ENABLE_CUDA_LAMBDA ${KOKKOS_INTERNAL_ENABLE_CUDA_LAMBDA_DEFAULT} CACHE BOOL "Enable lambdas for CUDA. (cuda option)")
#-------------------------------------------------------------------------------
#----------------------- HOST ARCH AND LEGACY TRIBITS --------------------------
#-------------------------------------------------------------------------------
# This defines the previous legacy TriBITS builds.
set(KOKKOS_LEGACY_TRIBITS False)
IF ("${KOKKOS_ARCH}" STREQUAL "NOT_SET")
set(KOKKOS_ARCH "None")
IF(KOKKOS_HAS_TRILINOS)
set(KOKKOS_LEGACY_TRIBITS True)
ENDIF()
ENDIF()
IF (KOKKOS_HAS_TRILINOS)
IF (KOKKOS_LEGACY_TRIBITS)
message(STATUS "Using the legacy tribits build because KOKKOS_ARCH not set")
ELSE()
message(STATUS "NOT using the legacy tribits build because KOKKOS_ARCH *is* set")
ENDIF()
ENDIF()
#-------------------------------------------------------------------------------
#----------------------- Set CamelCase Options if they are not yet set ---------
#-------------------------------------------------------------------------------
foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST})
string(TOUPPER ${opt} OPT )
UNSET(KOKKOS_ENABLE_${OPT}_INTERNAL CACHE)
SET(KOKKOS_ENABLE_${OPT}_INTERNAL ${KOKKOS_ENABLE_${OPT}} CACHE BOOL INTERNAL)
IF(DEFINED KOKKOS_ENABLE_${OPT})
UNSET(Kokkos_ENABLE_${opt} CACHE)
SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}} CACHE BOOL "CamelCase Compatibility setting for KOKKOS_ENABLE_${OPT}")
ENDIF()
endforeach()

View File

@ -0,0 +1,257 @@
########################## NOTES ###############################################
# This files goal is to take CMake options found in kokkos_options.cmake but
# possibly set from elsewhere
# (see: trilinos/cmake/ProjectCOmpilerPostConfig.cmake)
# using CMake idioms and map them onto the KOKKOS_SETTINGS variables that gets
# passed to the kokkos makefile configuration:
# make -f ${CMAKE_SOURCE_DIR}/core/src/Makefile ${KOKKOS_SETTINGS} build-makefile-cmake-kokkos
# that generates KokkosCore_config.h and kokkos_generated_settings.cmake
# To understand how to form KOKKOS_SETTINGS, see
# <KOKKOS_PATH>/Makefile.kokkos
#-------------------------------------------------------------------------------
#------------------------------- GENERAL OPTIONS -------------------------------
#-------------------------------------------------------------------------------
# Ensure that KOKKOS_ARCH is in the ARCH_LIST
foreach(arch ${KOKKOS_ARCH})
list(FIND KOKKOS_ARCH_LIST ${arch} indx)
if (indx EQUAL -1)
message(FATAL_ERROR "${arch} is not an accepted value for KOKKOS_ARCH."
" Please pick from these choices: ${KOKKOS_INTERNAL_ARCH_DOCSTR}")
endif ()
endforeach()
# KOKKOS_SETTINGS uses KOKKOS_ARCH
string(REPLACE ";" "," KOKKOS_ARCH "${KOKKOS_ARCH}")
set(KOKKOS_ARCH ${KOKKOS_ARCH})
# From Makefile.kokkos: Options: yes,no
if(${KOKKOS_ENABLE_DEBUG})
set(KOKKOS_DEBUG yes)
else()
set(KOKKOS_DEBUG no)
endif()
#------------------------------- KOKKOS_DEVICES --------------------------------
# Can have multiple devices
set(KOKKOS_DEVICESl)
foreach(devopt ${KOKKOS_DEVICES_LIST})
string(TOUPPER ${devopt} devoptuc)
if (${KOKKOS_ENABLE_${devoptuc}})
list(APPEND KOKKOS_DEVICESl ${devopt})
endif ()
endforeach()
# List needs to be comma-delmitted
string(REPLACE ";" "," KOKKOS_DEVICES "${KOKKOS_DEVICESl}")
#------------------------------- KOKKOS_OPTIONS --------------------------------
# From Makefile.kokkos: Options: aggressive_vectorization,disable_profiling
#compiler_warnings, aggressive_vectorization, disable_profiling, disable_dualview_modify_check, enable_profile_load_print
set(KOKKOS_OPTIONSl)
if(${KOKKOS_ENABLE_COMPILER_WARNINGS})
list(APPEND KOKKOS_OPTIONSl compiler_warnings)
endif()
if(${KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION})
list(APPEND KOKKOS_OPTIONSl aggressive_vectorization)
endif()
if(NOT ${KOKKOS_ENABLE_PROFILING})
list(APPEND KOKKOS_OPTIONSl disable_vectorization)
endif()
if(NOT ${KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK})
list(APPEND KOKKOS_OPTIONSl disable_dualview_modify_check)
endif()
if(${KOKKOS_ENABLE_PROFILING_LOAD_PRINT})
list(APPEND KOKKOS_OPTIONSl enable_profile_load_print)
endif()
# List needs to be comma-delimitted
string(REPLACE ";" "," KOKKOS_OPTIONS "${KOKKOS_OPTIONSl}")
#------------------------------- KOKKOS_USE_TPLS -------------------------------
# Construct the Makefile options
set(KOKKOS_USE_TPLSl)
foreach(tplopt ${KOKKOS_USE_TPLS_LIST})
if (${KOKKOS_ENABLE_${tplopt}})
list(APPEND KOKKOS_USE_TPLSl ${KOKKOS_INTERNAL_${tplopt}})
endif ()
endforeach()
# List needs to be comma-delimitted
string(REPLACE ";" "," KOKKOS_USE_TPLS "${KOKKOS_USE_TPLSl}")
#------------------------------- KOKKOS_CUDA_OPTIONS ---------------------------
# Construct the Makefile options
set(KOKKOS_CUDA_OPTIONS)
foreach(cudaopt ${KOKKOS_CUDA_OPTIONS_LIST})
if (${KOKKOS_ENABLE_CUDA_${cudaopt}})
list(APPEND KOKKOS_CUDA_OPTIONSl ${KOKKOS_INTERNAL_${cudaopt}})
endif ()
endforeach()
# List needs to be comma-delmitted
string(REPLACE ";" "," KOKKOS_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}")
#------------------------------- PATH VARIABLES --------------------------------
# Want makefile to use same executables specified which means modifying
# the path so the $(shell ...) commands in the makefile see the right exec
# Also, the Makefile's use FOO_PATH naming scheme for -I/-L construction
#TODO: Makefile.kokkos allows this to be overwritten? ROCM_HCC_PATH
set(KOKKOS_INTERNAL_PATHS)
set(addpathl)
foreach(kvar "CUDA;QTHREADS;${KOKKOS_USE_TPLS_LIST}")
if(${KOKKOS_ENABLE_${kvar}})
if(DEFINED KOKKOS_${kvar}_DIR)
set(KOKKOS_INTERNAL_PATHS "${KOKKOS_INTERNAL_PATHS} ${kvar}_PATH=${KOKKOS_${kvar}_DIR}")
if(IS_DIRECTORY ${KOKKOS_${kvar}_DIR}/bin)
list(APPEND addpathl ${KOKKOS_${kvar}_DIR}/bin)
endif()
endif()
endif()
endforeach()
# Path env is : delimitted
string(REPLACE ";" ":" KOKKOS_INTERNAL_ADDTOPATH "${addpathl}")
######################### SET KOKKOS_SETTINGS ##################################
# Set the KOKKOS_SETTINGS String -- this is the primary communication with the
# makefile configuration. See Makefile.kokkos
set(KOKKOS_SETTINGS KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH})
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_PATH=${KOKKOS_PATH})
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_INSTALL_PATH=${CMAKE_INSTALL_PREFIX})
# Form of KOKKOS_foo=$KOKKOS_foo
foreach(kvar ARCH;DEVICES;DEBUG;OPTIONS;CUDA_OPTIONS;USE_TPLS)
set(KOKKOS_VAR KOKKOS_${kvar})
if(DEFINED KOKKOS_${kvar})
if (NOT "${${KOKKOS_VAR}}" STREQUAL "")
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_VAR}=${${KOKKOS_VAR}})
endif()
endif()
endforeach()
# Form of VAR=VAL
#TODO: Makefile supports MPICH_CXX, OMPI_CXX as well
foreach(ovar CXX;CXXFLAGS;LDFLAGS)
if(DEFINED ${ovar})
if (NOT "${${ovar}}" STREQUAL "")
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${ovar}=${${ovar}})
endif()
endif()
endforeach()
# Finally, do the paths
if (NOT "${KOKKOS_INTERNAL_PATHS}" STREQUAL "")
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_INTERNAL_PATHS})
endif()
if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "")
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} PATH=${KOKKOS_INTERNAL_ADDTOPATH}:\${PATH})
endif()
# Final form that gets passed to make
set(KOKKOS_SETTINGS env ${KOKKOS_SETTINGS})
############################ PRINT CONFIGURE STATUS ############################
if(KOKKOS_CMAKE_VERBOSE)
message(STATUS "")
message(STATUS "****************** Kokkos Settings ******************")
message(STATUS "Execution Spaces")
if(KOKKOS_ENABLE_CUDA)
message(STATUS " Device Parallel: Cuda")
else()
message(STATUS " Device Parallel: None")
endif()
if(KOKKOS_ENABLE_OPENMP)
message(STATUS " Host Parallel: OpenMP")
elseif(KOKKOS_ENABLE_PTHREAD)
message(STATUS " Host Parallel: Pthread")
elseif(KOKKOS_ENABLE_QTHREADS)
message(STATUS " Host Parallel: Qthreads")
else()
message(STATUS " Host Parallel: None")
endif()
if(KOKKOS_ENABLE_SERIAL)
message(STATUS " Host Serial: Serial")
else()
message(STATUS " Host Serial: None")
endif()
message(STATUS "")
message(STATUS "Architectures:")
message(STATUS " ${KOKKOS_ARCH}")
message(STATUS "")
message(STATUS "Enabled options")
if(KOKKOS_SEPARATE_LIBS)
message(STATUS " KOKKOS_SEPARATE_LIBS")
endif()
if(KOKKOS_ENABLE_HWLOC)
message(STATUS " KOKKOS_ENABLE_HWLOC")
endif()
if(KOKKOS_ENABLE_MEMKIND)
message(STATUS " KOKKOS_ENABLE_MEMKIND")
endif()
if(KOKKOS_ENABLE_DEBUG)
message(STATUS " KOKKOS_ENABLE_DEBUG")
endif()
if(KOKKOS_ENABLE_PROFILING)
message(STATUS " KOKKOS_ENABLE_PROFILING")
endif()
if(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION)
message(STATUS " KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION")
endif()
if(KOKKOS_ENABLE_CUDA)
if(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC)
message(STATUS " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
endif()
if(KOKKOS_ENABLE_CUDA_UVM)
message(STATUS " KOKKOS_ENABLE_CUDA_UVM")
endif()
if(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
message(STATUS " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
endif()
if(KOKKOS_ENABLE_CUDA_LAMBDA)
message(STATUS " KOKKOS_ENABLE_CUDA_LAMBDA")
endif()
if(KOKKOS_CUDA_DIR)
message(STATUS " KOKKOS_CUDA_DIR: ${KOKKOS_CUDA_DIR}")
endif()
endif()
if(KOKKOS_QTHREADS_DIR)
message(STATUS " KOKKOS_QTHREADS_DIR: ${KOKKOS_QTHREADS_DIR}")
endif()
if(KOKKOS_HWLOC_DIR)
message(STATUS " KOKKOS_HWLOC_DIR: ${KOKKOS_HWLOC_DIR}")
endif()
if(KOKKOS_MEMKIND_DIR)
message(STATUS " KOKKOS_MEMKIND_DIR: ${KOKKOS_MEMKIND_DIR}")
endif()
message(STATUS "")
message(STATUS "Final kokkos settings variable:")
message(STATUS " ${KOKKOS_SETTINGS}")
message(STATUS "*****************************************************")
message(STATUS "")
endif()

View File

@ -3,10 +3,6 @@ INCLUDE(CTest)
cmake_policy(SET CMP0054 NEW)
IF(NOT DEFINED ${PROJECT_NAME})
project(KokkosCMake)
ENDIF()
MESSAGE(WARNING "The project name is: ${PROJECT_NAME}")
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP)
@ -46,26 +42,26 @@ MACRO(PREPEND_GLOBAL_SET VARNAME)
GLOBAL_SET(${VARNAME} ${ARGN} ${${VARNAME}})
ENDMACRO()
FUNCTION(REMOVE_GLOBAL_DUPLICATES VARNAME)
ASSERT_DEFINED(${VARNAME})
IF (${VARNAME})
SET(TMP ${${VARNAME}})
LIST(REMOVE_DUPLICATES TMP)
GLOBAL_SET(${VARNAME} ${TMP})
ENDIF()
ENDFUNCTION()
#FUNCTION(REMOVE_GLOBAL_DUPLICATES VARNAME)
# ASSERT_DEFINED(${VARNAME})
# IF (${VARNAME})
# SET(TMP ${${VARNAME}})
# LIST(REMOVE_DUPLICATES TMP)
# GLOBAL_SET(${VARNAME} ${TMP})
# ENDIF()
#ENDFUNCTION()
MACRO(TRIBITS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE)
MESSAGE(STATUS "TRIBITS_ADD_OPTION_AND_DEFINE: '${USER_OPTION_NAME}' '${MACRO_DEFINE_NAME}' '${DEFAULT_VALUE}'")
SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" )
IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "")
IF(${USER_OPTION_NAME})
GLOBAL_SET(${MACRO_DEFINE_NAME} ON)
ELSE()
GLOBAL_SET(${MACRO_DEFINE_NAME} OFF)
ENDIF()
ENDIF()
ENDMACRO()
#MACRO(TRIBITS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE)
# MESSAGE(STATUS "TRIBITS_ADD_OPTION_AND_DEFINE: '${USER_OPTION_NAME}' '${MACRO_DEFINE_NAME}' '${DEFAULT_VALUE}'")
# SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" )
# IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "")
# IF(${USER_OPTION_NAME})
# GLOBAL_SET(${MACRO_DEFINE_NAME} ON)
# ELSE()
# GLOBAL_SET(${MACRO_DEFINE_NAME} OFF)
# ENDIF()
# ENDIF()
#ENDMACRO()
FUNCTION(TRIBITS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE)
@ -77,17 +73,20 @@ FUNCTION(TRIBITS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE)
ENDFUNCTION()
MACRO(TRIBITS_ADD_DEBUG_OPTION)
TRIBITS_ADD_OPTION_AND_DEFINE(
${PROJECT_NAME}_ENABLE_DEBUG
HAVE_${PROJECT_NAME_UC}_DEBUG
"Enable a host of runtime debug checking."
OFF
)
ENDMACRO()
#MACRO(TRIBITS_ADD_DEBUG_OPTION)
# TRIBITS_ADD_OPTION_AND_DEFINE(
# ${PROJECT_NAME}_ENABLE_DEBUG
# HAVE_${PROJECT_NAME_UC}_DEBUG
# "Enable a host of runtime debug checking."
# OFF
# )
#ENDMACRO()
MACRO(TRIBITS_ADD_TEST_DIRECTORIES)
message(STATUS "ProjectName: " ${PROJECT_NAME})
message(STATUS "Tests: " ${${PROJECT_NAME}_ENABLE_TESTS})
IF(${${PROJECT_NAME}_ENABLE_TESTS})
FOREACH(TEST_DIR ${ARGN})
ADD_SUBDIRECTORY(${TEST_DIR})
@ -387,17 +386,17 @@ FUNCTION(TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME)
ENDFUNCTION()
MACRO(TRIBITS_PROCESS_TPL_DEP_FILE TPL_FILE)
GET_FILENAME_COMPONENT(TPL_NAME ${TPL_FILE} NAME_WE)
INCLUDE("${TPL_FILE}")
IF(TARGET TPL_LIB_${TPL_NAME})
MESSAGE(STATUS "Found tpl library: ${TPL_NAME}")
SET(TPL_ENABLE_${TPL_NAME} TRUE)
ELSE()
MESSAGE(STATUS "Tpl library not found: ${TPL_NAME}")
SET(TPL_ENABLE_${TPL_NAME} FALSE)
ENDIF()
ENDMACRO()
#MACRO(TRIBITS_PROCESS_TPL_DEP_FILE TPL_FILE)
# GET_FILENAME_COMPONENT(TPL_NAME ${TPL_FILE} NAME_WE)
# INCLUDE("${TPL_FILE}")
# IF(TARGET TPL_LIB_${TPL_NAME})
# MESSAGE(STATUS "Found tpl library: ${TPL_NAME}")
# SET(TPL_ENABLE_${TPL_NAME} TRUE)
# ELSE()
# MESSAGE(STATUS "Tpl library not found: ${TPL_NAME}")
# SET(TPL_ENABLE_${TPL_NAME} FALSE)
# ENDIF()
#ENDMACRO()
MACRO(PREPEND_TARGET_SET VARNAME TARGET_NAME TYPE)
IF(TYPE STREQUAL "REQUIRED")
@ -475,6 +474,7 @@ MACRO(TRIBITS_SUBPACKAGE NAME)
SET(PARENT_PACKAGE_NAME ${PACKAGE_NAME})
SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME})
STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC)
SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME})
@ -494,11 +494,11 @@ MACRO(TRIBITS_PACKAGE_DECL NAME)
SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC)
SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps")
FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake")
FOREACH(TPL_FILE ${TPLS_FILES})
TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE})
ENDFOREACH()
#SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps")
#FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake")
#FOREACH(TPL_FILE ${TPLS_FILES})
# TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE})
#ENDFOREACH()
ENDMACRO()

View File

@ -11,3 +11,4 @@ tag: 2.03.13 date: 07:27:2017 master: da314444 develop: 29ccb58a
tag: 2.04.00 date: 08:16:2017 master: 54eb75c0 develop: 32fb8ee1
tag: 2.04.04 date: 09:11:2017 master: 2b7e9c20 develop: 51e7b25a
tag: 2.04.11 date: 10:28:2017 master: 54a1330a develop: ed36c017
tag: 2.5.11 date: 12:15:2017 master: dfe685f4 develop: ec7ad6d8

View File

@ -39,6 +39,12 @@ cuda_args=""
# Arguments for both NVCC and Host compiler
shared_args=""
# Argument -c
compile_arg=""
# Argument -o <obj>
output_arg=""
# Linker arguments
xlinker_args=""
@ -66,6 +72,7 @@ dry_run=0
# Skip NVCC compilation and use host compiler directly
host_only=0
host_only_args=""
# Enable workaround for CUDA 6.5 for pragma ident
replace_pragma_ident=0
@ -78,6 +85,14 @@ temp_dir=${TMPDIR:-/tmp}
# Check if we have an optimization argument already
optimization_applied=0
# Check if we have -std=c++X or --std=c++X already
stdcxx_applied=0
# Run nvcc a second time to generate dependencies if needed
depfile_separate=0
depfile_output_arg=""
depfile_target_arg=""
#echo "Arguments: $# $@"
while [ $# -gt 0 ]
@ -109,12 +124,31 @@ do
fi
;;
#Handle shared args (valid for both nvcc and the host compiler)
-D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
-D*|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
shared_args="$shared_args $1"
;;
#Handle shared args that have an argument
-o|-MT)
shared_args="$shared_args $1 $2"
#Handle compilation argument
-c)
compile_arg="$1"
;;
#Handle output argument
-o)
output_arg="$output_arg $1 $2"
shift
;;
# Handle depfile arguments. We map them to a separate call to nvcc.
-MD|-MMD)
depfile_separate=1
host_only_args="$host_only_args $1"
;;
-MF)
depfile_output_arg="-o $2"
host_only_args="$host_only_args $1 $2"
shift
;;
-MT)
depfile_target_arg="$1 $2"
host_only_args="$host_only_args $1 $2"
shift
;;
#Handle known nvcc args
@ -130,16 +164,25 @@ do
cuda_args="$cuda_args $1 $2"
shift
;;
#Handle c++11 setting
--std=c++11|-std=c++11)
shared_args="$shared_args $1"
#Handle c++11
--std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++1z|-std=c++1z)
if [ $stdcxx_applied -eq 1 ]; then
echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-std=c++1* or --std=c++1*), only the first is used because nvcc can only accept a single std setting"
else
shared_args="$shared_args $1"
stdcxx_applied=1
fi
;;
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
-std=c++98|--std=c++98)
;;
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
-pedantic|-Wpedantic|-ansi)
;;
#strip of -Woverloaded-virtual to avoid "cc1: warning: command line option -Woverloaded-virtual is valid for C++/ObjC++ but not for C"
-Woverloaded-virtual)
;;
#strip -Xcompiler because we add it
-Xcompiler)
if [ $first_xcompiler_arg -eq 1 ]; then
@ -190,7 +233,7 @@ do
object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
#Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
*.dylib)
@*|*.dylib)
object_files="$object_files -Xlinker $1"
object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
@ -230,7 +273,7 @@ if [ $first_xcompiler_arg -eq 0 ]; then
fi
#Compose host only command
host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
host_command="$host_compiler $shared_args $host_only_args $compile_arg $output_arg $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
if [ $replace_pragma_ident -eq 1 ]; then
@ -262,10 +305,21 @@ else
host_command="$host_command $object_files"
fi
if [ $depfile_separate -eq 1 ]; then
# run nvcc a second time to generate dependencies (without compiling)
nvcc_depfile_command="$nvcc_command -M $depfile_target_arg $depfile_output_arg"
else
nvcc_depfile_command=""
fi
nvcc_command="$nvcc_command $compile_arg $output_arg"
#Print command for dryrun
if [ $dry_run -eq 1 ]; then
if [ $host_only -eq 1 ]; then
echo $host_command
elif [ -n "$nvcc_depfile_command" ]; then
echo $nvcc_command "&&" $nvcc_depfile_command
else
echo $nvcc_command
fi
@ -275,6 +329,8 @@ fi
#Run compilation command
if [ $host_only -eq 1 ]; then
$host_command
elif [ -n "$nvcc_depfile_command" ]; then
$nvcc_command && $nvcc_depfile_command
else
$nvcc_command
fi

View File

@ -16,12 +16,12 @@ if [[ "$HOSTNAME" =~ (white|ride).* ]]; then
MACHINE=white
elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then
MACHINE=bowman
elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
elif [[ "$HOSTNAME" =~ n.* ]]; then # Warning: very generic name
if [[ "$PROCESSOR" = "aarch64" ]]; then
MACHINE=sullivan
else
MACHINE=shepard
fi
elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
MACHINE=shepard
elif [[ "$HOSTNAME" =~ apollo ]]; then
MACHINE=apollo
elif [[ "$HOSTNAME" =~ sullivan ]]; then
@ -45,7 +45,8 @@ GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits
IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CUDA_WARNING_FLAGS=""
CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
PGI_WARNING_FLAGS=""
# Default. Machine specific can override.
DEBUG=False
@ -61,6 +62,8 @@ SPOT_CHECK=False
PRINT_HELP=False
OPT_FLAG=""
CXX_FLAGS_EXTRA=""
LD_FLAGS_EXTRA=""
KOKKOS_OPTIONS=""
#
@ -111,6 +114,12 @@ do
--with-cuda-options*)
KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}"
;;
--cxxflags-extra*)
CXX_FLAGS_EXTRA="${key#*=}"
;;
--ldflags-extra*)
LD_FLAGS_EXTRA="${key#*=}"
;;
--help*)
PRINT_HELP=True
;;
@ -150,20 +159,18 @@ if [ "$MACHINE" = "sems" ]; then
if [ "$SPOT_CHECK" = "True" ]; then
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
"gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS"
"gcc/6.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
"intel/17.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
"cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
else
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/16.0.3 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
@ -184,6 +191,7 @@ elif [ "$MACHINE" = "white" ]; then
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/5.4.0"
CUDA_MODULE_LIST2="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/6.3.0,ibm/xl/13.1.6-BETA"
# Don't do pthread on white.
GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
@ -192,6 +200,7 @@ elif [ "$MACHINE" = "white" ]; then
COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
"cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/9.0.103 $CUDA_MODULE_LIST2 $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
if [ -z "$ARCH_FLAG" ]; then
@ -210,8 +219,9 @@ elif [ "$MACHINE" = "bowman" ]; then
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
COMPILERS=("intel/16.4.258 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.2.174 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/18.0.128 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
)
if [ -z "$ARCH_FLAG" ]; then
@ -241,13 +251,13 @@ elif [ "$MACHINE" = "shepard" ]; then
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=32
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
BASE_MODULE_LIST_INTEL="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
COMPILERS=("intel/17.4.196 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/18.0.128 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"pgi/17.10.0 $BASE_MODULE_LIST $GCC_BUILD_LIST pgc++ $PGI_WARNING_FLAGS"
)
if [ -z "$ARCH_FLAG" ]; then
@ -280,7 +290,7 @@ elif [ "$MACHINE" = "apollo" ]; then
if [ "$SPOT_CHECK" = "True" ]; then
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
"gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
@ -292,14 +302,13 @@ elif [ "$MACHINE" = "apollo" ]; then
COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"clang/4.0.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
"clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
"gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
@ -336,6 +345,8 @@ if [ "$PRINT_HELP" = "True" ]; then
echo "--dry-run: Just print what would be executed"
echo "--build-only: Just do builds, don't run anything"
echo "--opt-flag=FLAG: Optimization flag (default: -O3)"
echo "--cxxflags-extra=FLAGS: Extra flags to be added to CXX_FLAGS"
echo "--ldflags-extra=FLAGS: Extra flags to be added to LD_FLAGS"
echo "--arch=ARCHITECTURE: overwrite architecture flags"
echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS"
echo "--build-list=BUILD,BUILD,BUILD..."
@ -361,14 +372,14 @@ if [ "$PRINT_HELP" = "True" ]; then
echo " Run all gcc tests"
echo " % test_all_sandia gcc"
echo ""
echo " Run all gcc/4.7.2 and all intel tests"
echo " % test_all_sandia gcc/4.7.2 intel"
echo " Run all gcc/4.8.4 and all intel tests"
echo " % test_all_sandia gcc/4.8.4 intel"
echo ""
echo " Run all tests in debug"
echo " % test_all_sandia --debug"
echo ""
echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds"
echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial"
echo " Run gcc/4.8.4 and only do OpenMP and OpenMP_Serial builds"
echo " % test_all_sandia gcc/4.8.4 --build-list=OpenMP,OpenMP_Serial"
echo ""
echo "If you want to kill the tests, do:"
echo " hit ctrl-z"
@ -566,10 +577,15 @@ single_build_and_test() {
if [[ "$build_type" = *debug* ]]; then
local extra_args="$extra_args --debug"
local cxxflags="-g $compiler_warning_flags"
local ldflags="-g"
else
local cxxflags="$OPT_FLAG $compiler_warning_flags"
local ldflags="${OPT_FLAG}"
fi
local cxxflags="${cxxflags} ${CXX_FLAGS_EXTRA}"
local ldflags="${ldflags} ${LD_FLAGS_EXTRA}"
if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then
local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS"
fi
@ -586,7 +602,7 @@ single_build_and_test() {
run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; }
fi
else
run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --ldflags=\"$ldflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
local -i build_start_time=$(date +%s)
run_cmd make -j 32 build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
local -i build_end_time=$(date +%s)

View File

@ -2,7 +2,10 @@
TRIBITS_SUBPACKAGE(Containers)
ADD_SUBDIRECTORY(src)
IF(KOKKOS_HAS_TRILINOS)
ADD_SUBDIRECTORY(src)
ENDIF()
TRIBITS_ADD_TEST_DIRECTORIES(unit_tests)
TRIBITS_ADD_TEST_DIRECTORIES(performance_tests)

View File

@ -3,6 +3,14 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
IF(NOT KOKKOS_HAS_TRILINOS)
IF(KOKKOS_SEPARATE_LIBS)
set(TEST_LINK_TARGETS kokkoscore)
ELSE()
set(TEST_LINK_TARGETS kokkos)
ENDIF()
ENDIF()
SET(SOURCES
TestMain.cpp
TestCuda.cpp
@ -24,7 +32,7 @@ TRIBITS_ADD_EXECUTABLE(
PerfTestExec
SOURCES ${SOURCES}
COMM serial mpi
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)
TRIBITS_ADD_TEST(

View File

@ -15,7 +15,8 @@ endif
CXXFLAGS = -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
LDFLAGS ?=
override LDFLAGS += -lpthread
include $(KOKKOS_PATH)/Makefile.kokkos

View File

@ -180,8 +180,8 @@ void test_dynrankview_op_perf( const int par_size )
typedef DeviceType execution_space;
typedef typename execution_space::size_type size_type;
const size_type dim2 = 90;
const size_type dim3 = 30;
const size_type dim_2 = 90;
const size_type dim_3 = 30;
double elapsed_time_view = 0;
double elapsed_time_compview = 0;
@ -191,7 +191,7 @@ void test_dynrankview_op_perf( const int par_size )
double elapsed_time_compdrview = 0;
Kokkos::Timer timer;
{
Kokkos::View<double***,DeviceType> testview("testview",par_size,dim2,dim3);
Kokkos::View<double***,DeviceType> testview("testview",par_size,dim_2,dim_3);
typedef InitViewFunctor<DeviceType> FunctorType;
timer.reset();
@ -220,7 +220,7 @@ void test_dynrankview_op_perf( const int par_size )
std::cout << " Strided View time (init only): " << elapsed_time_strideview << std::endl;
}
{
Kokkos::View<double*******,DeviceType> testview("testview",par_size,dim2,dim3,1,1,1,1);
Kokkos::View<double*******,DeviceType> testview("testview",par_size,dim_2,dim_3,1,1,1,1);
typedef InitViewRank7Functor<DeviceType> FunctorType;
timer.reset();
@ -231,7 +231,7 @@ void test_dynrankview_op_perf( const int par_size )
std::cout << " View Rank7 time (init only): " << elapsed_time_view_rank7 << std::endl;
}
{
Kokkos::DynRankView<double,DeviceType> testdrview("testdrview",par_size,dim2,dim3);
Kokkos::DynRankView<double,DeviceType> testdrview("testdrview",par_size,dim_2,dim_3);
typedef InitDynRankViewFunctor<DeviceType> FunctorType;
timer.reset();

View File

@ -54,6 +54,7 @@
#include <TestUnorderedMapPerformance.hpp>
#include <TestDynRankView.hpp>
#include <TestScatterView.hpp>
#include <iomanip>
#include <sstream>
@ -122,6 +123,18 @@ TEST_F( openmp, unordered_map_performance_far)
Perf::run_performance_tests<Kokkos::OpenMP,false>(base_file_name.str());
}
TEST_F( openmp, scatter_view)
{
std::cout << "ScatterView data-duplicated test:\n";
Perf::test_scatter_view<Kokkos::OpenMP, Kokkos::LayoutRight,
Kokkos::Experimental::ScatterDuplicated,
Kokkos::Experimental::ScatterNonAtomic>(10, 1000 * 1000);
//std::cout << "ScatterView atomics test:\n";
//Perf::test_scatter_view<Kokkos::OpenMP, Kokkos::LayoutRight,
// Kokkos::Experimental::ScatterNonDuplicated,
// Kokkos::Experimental::ScatterAtomic>(10, 1000 * 1000);
}
} // namespace test
#else
void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTOPENMP_PREVENT_EMPTY_LINK_ERROR() {}

View File

@ -0,0 +1,113 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TEST_SCATTER_VIEW_HPP
#define KOKKOS_TEST_SCATTER_VIEW_HPP
#include <Kokkos_ScatterView.hpp>
#include <impl/Kokkos_Timer.hpp>
namespace Perf {
template <typename ExecSpace, typename Layout, int duplication, int contribution>
void test_scatter_view(int m, int n)
{
Kokkos::View<double *[3], Layout, ExecSpace> original_view("original_view", n);
{
auto scatter_view = Kokkos::Experimental::create_scatter_view
< Kokkos::Experimental::ScatterSum
, duplication
, contribution
> (original_view);
Kokkos::Experimental::UniqueToken<
ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global>
unique_token{ExecSpace()};
//auto internal_view = scatter_view.internal_view;
auto policy = Kokkos::RangePolicy<ExecSpace, int>(0, n);
for (int foo = 0; foo < 5; ++foo) {
{
auto num_threads = unique_token.size();
std::cout << "num_threads " << num_threads << '\n';
Kokkos::View<double **[3], Layout, ExecSpace> hand_coded_duplicate_view("hand_coded_duplicate", num_threads, n);
auto f2 = KOKKOS_LAMBDA(int i) {
auto thread_id = unique_token.acquire();
for (int j = 0; j < 10; ++j) {
auto k = (i + j) % n;
hand_coded_duplicate_view(thread_id, k, 0) += 4.2;
hand_coded_duplicate_view(thread_id, k, 1) += 2.0;
hand_coded_duplicate_view(thread_id, k, 2) += 1.0;
}
};
Kokkos::Timer timer;
timer.reset();
for (int k = 0; k < m; ++k) {
Kokkos::parallel_for(policy, f2, "hand_coded_duplicate_scatter_view_test");
}
auto t = timer.seconds();
std::cout << "hand-coded test took " << t << " seconds\n";
}
{
auto f = KOKKOS_LAMBDA(int i) {
auto scatter_access = scatter_view.access();
for (int j = 0; j < 10; ++j) {
auto k = (i + j) % n;
scatter_access(k, 0) += 4.2;
scatter_access(k, 1) += 2.0;
scatter_access(k, 2) += 1.0;
}
};
Kokkos::Timer timer;
timer.reset();
for (int k = 0; k < m; ++k) {
Kokkos::parallel_for(policy, f, "scatter_view_test");
}
auto t = timer.seconds();
std::cout << "test took " << t << " seconds\n";
}
}
}
}
}
#endif

View File

@ -6,26 +6,42 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
#-----------------------------------------------------------------------------
SET(HEADERS "")
SET(SOURCES "")
SET(HEADERS_IMPL "")
FILE(GLOB HEADERS *.hpp)
FILE(GLOB HEADERS_IMPL impl/*.hpp)
FILE(GLOB SOURCES impl/*.cpp)
SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/)
if(KOKKOS_LEGACY_TRIBITS)
TRIBITS_ADD_LIBRARY(
kokkoscontainers
HEADERS ${HEADERS}
NOINSTALLHEADERS ${HEADERS_IMPL}
SOURCES ${SOURCES}
DEPLIBS
)
SET(HEADERS "")
SET(SOURCES "")
SET(HEADERS_IMPL "")
FILE(GLOB HEADERS *.hpp)
FILE(GLOB HEADERS_IMPL impl/*.hpp)
FILE(GLOB SOURCES impl/*.cpp)
INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/)
TRIBITS_ADD_LIBRARY(
kokkoscontainers
HEADERS ${HEADERS}
NOINSTALLHEADERS ${HEADERS_IMPL}
SOURCES ${SOURCES}
DEPLIBS
)
else()
INSTALL (
DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
DESTINATION ${TRILINOS_INCDIR}
FILES_MATCHING PATTERN "*.hpp"
)
TRIBITS_ADD_LIBRARY(
kokkoscontainers
SOURCES ${KOKKOS_CONTAINERS_SRCS}
DEPLIBS
)
endif()
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,999 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_ScatterView.hpp
/// \brief Declaration and definition of Kokkos::ScatterView.
///
/// This header file declares and defines Kokkos::ScatterView and its
/// related nonmember functions.
#ifndef KOKKOS_SCATTER_VIEW_HPP
#define KOKKOS_SCATTER_VIEW_HPP
#include <Kokkos_Core.hpp>
#include <utility>
namespace Kokkos {
namespace Experimental {
//TODO: replace this enum with the Kokkos::Sum, etc reducers for parallel_reduce
enum : int {
ScatterSum,
};
enum : int {
ScatterNonDuplicated = 0,
ScatterDuplicated = 1
};
enum : int {
ScatterNonAtomic = 0,
ScatterAtomic = 1
};
}} // Kokkos::Experimental
namespace Kokkos {
namespace Impl {
namespace Experimental {
template <typename ExecSpace>
struct DefaultDuplication;
template <typename ExecSpace, int duplication>
struct DefaultContribution;
#ifdef KOKKOS_ENABLE_SERIAL
template <>
struct DefaultDuplication<Kokkos::Serial> {
enum : int { value = Kokkos::Experimental::ScatterNonDuplicated };
};
template <>
struct DefaultContribution<Kokkos::Serial, Kokkos::Experimental::ScatterNonDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
};
template <>
struct DefaultContribution<Kokkos::Serial, Kokkos::Experimental::ScatterDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
};
#endif
#ifdef KOKKOS_ENABLE_OPENMP
template <>
struct DefaultDuplication<Kokkos::OpenMP> {
enum : int { value = Kokkos::Experimental::ScatterDuplicated };
};
template <>
struct DefaultContribution<Kokkos::OpenMP, Kokkos::Experimental::ScatterNonDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterAtomic };
};
template <>
struct DefaultContribution<Kokkos::OpenMP, Kokkos::Experimental::ScatterDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
};
#endif
#ifdef KOKKOS_ENABLE_THREADS
template <>
struct DefaultDuplication<Kokkos::Threads> {
enum : int { value = Kokkos::Experimental::ScatterDuplicated };
};
template <>
struct DefaultContribution<Kokkos::Threads, Kokkos::Experimental::ScatterNonDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterAtomic };
};
template <>
struct DefaultContribution<Kokkos::Threads, Kokkos::Experimental::ScatterDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
};
#endif
#ifdef KOKKOS_ENABLE_CUDA
template <>
struct DefaultDuplication<Kokkos::Cuda> {
enum : int { value = Kokkos::Experimental::ScatterNonDuplicated };
};
template <>
struct DefaultContribution<Kokkos::Cuda, Kokkos::Experimental::ScatterNonDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterAtomic };
};
template <>
struct DefaultContribution<Kokkos::Cuda, Kokkos::Experimental::ScatterDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterAtomic };
};
#endif
/* ScatterValue is the object returned by the access operator() of ScatterAccess,
similar to that returned by an Atomic View, it wraps Kokkos::atomic_add with convenient
operator+=, etc. */
template <typename ValueType, int Op, int contribution>
struct ScatterValue;
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonAtomic> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : value( value_in ) {}
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : value( other.value ) {}
KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
value += rhs;
}
KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
value -= rhs;
}
private:
ValueType& value;
};
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterAtomic> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : value( value_in ) {}
KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
Kokkos::atomic_add(&value, rhs);
}
KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
Kokkos::atomic_add(&value, -rhs);
}
private:
ValueType& value;
};
/* DuplicatedDataType, given a View DataType, will create a new DataType
that has a new runtime dimension which becomes the largest-stride dimension.
In the case of LayoutLeft, due to the limitation induced by the design of DataType
itself, it must convert any existing compile-time dimensions into runtime dimensions. */
template <typename T, typename Layout>
struct DuplicatedDataType;
template <typename T>
struct DuplicatedDataType<T, Kokkos::LayoutRight> {
typedef T* value_type; // For LayoutRight, add a star all the way on the left
};
template <typename T, size_t N>
struct DuplicatedDataType<T[N], Kokkos::LayoutRight> {
typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type value_type[N];
};
template <typename T>
struct DuplicatedDataType<T[], Kokkos::LayoutRight> {
typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type value_type[];
};
template <typename T>
struct DuplicatedDataType<T*, Kokkos::LayoutRight> {
typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type* value_type;
};
template <typename T>
struct DuplicatedDataType<T, Kokkos::LayoutLeft> {
typedef T* value_type;
};
template <typename T, size_t N>
struct DuplicatedDataType<T[N], Kokkos::LayoutLeft> {
typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type* value_type;
};
template <typename T>
struct DuplicatedDataType<T[], Kokkos::LayoutLeft> {
typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type* value_type;
};
template <typename T>
struct DuplicatedDataType<T*, Kokkos::LayoutLeft> {
typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type* value_type;
};
/* Slice is just responsible for stuffing the correct number of Kokkos::ALL
arguments on the correct side of the index in a call to subview() to get a
subview where the index specified is the largest-stride one. */
template <typename Layout, int rank, typename V, typename ... Args>
struct Slice {
typedef Slice<Layout, rank - 1, V, Kokkos::Impl::ALL_t, Args...> next;
typedef typename next::value_type value_type;
static
value_type get(V const& src, const size_t i, Args ... args) {
return next::get(src, i, Kokkos::ALL, args...);
}
};
template <typename V, typename ... Args>
struct Slice<Kokkos::LayoutRight, 1, V, Args...> {
typedef typename Kokkos::Impl::ViewMapping
< void
, V
, const size_t
, Args ...
>::type value_type;
static
value_type get(V const& src, const size_t i, Args ... args) {
return Kokkos::subview(src, i, args...);
}
};
template <typename V, typename ... Args>
struct Slice<Kokkos::LayoutLeft, 1, V, Args...> {
typedef typename Kokkos::Impl::ViewMapping
< void
, V
, Args ...
, const size_t
>::type value_type;
static
value_type get(V const& src, const size_t i, Args ... args) {
return Kokkos::subview(src, args..., i);
}
};
template <typename ExecSpace, typename ValueType, int Op>
struct ReduceDuplicates;
template <typename ExecSpace, typename ValueType, int Op>
struct ReduceDuplicatesBase {
typedef ReduceDuplicates<ExecSpace, ValueType, Op> Derived;
ValueType const* src;
ValueType* dst;
size_t stride;
size_t start;
size_t n;
ReduceDuplicatesBase(ValueType const* src_in, ValueType* dest_in, size_t stride_in, size_t start_in, size_t n_in, std::string const& name)
: src(src_in)
, dst(dest_in)
, stride(stride_in)
, start(start_in)
, n(n_in)
{
#if defined(KOKKOS_ENABLE_PROFILING)
uint64_t kpID = 0;
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, &kpID);
}
#endif
typedef RangePolicy<ExecSpace, size_t> policy_type;
typedef Kokkos::Impl::ParallelFor<Derived, policy_type> closure_type;
const closure_type closure(*(static_cast<Derived*>(this)), policy_type(0, stride));
closure.execute();
#if defined(KOKKOS_ENABLE_PROFILING)
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::endParallelFor(kpID);
}
#endif
}
};
template <typename ExecSpace, typename ValueType>
struct ReduceDuplicates<ExecSpace, ValueType, Kokkos::Experimental::ScatterSum> :
public ReduceDuplicatesBase<ExecSpace, ValueType, Kokkos::Experimental::ScatterSum>
{
typedef ReduceDuplicatesBase<ExecSpace, ValueType, Kokkos::Experimental::ScatterSum> Base;
ReduceDuplicates(ValueType const* src_in, ValueType* dst_in, size_t stride_in, size_t start_in, size_t n_in, std::string const& name):
Base(src_in, dst_in, stride_in, start_in, n_in, name)
{}
KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
for (size_t j = Base::start; j < Base::n; ++j) {
Base::dst[i] += Base::src[i + Base::stride * j];
}
}
};
template <typename ExecSpace, typename ValueType, int Op>
struct ResetDuplicates;
template <typename ExecSpace, typename ValueType, int Op>
struct ResetDuplicatesBase {
typedef ResetDuplicates<ExecSpace, ValueType, Op> Derived;
ValueType* data;
ResetDuplicatesBase(ValueType* data_in, size_t size_in, std::string const& name)
: data(data_in)
{
#if defined(KOKKOS_ENABLE_PROFILING)
uint64_t kpID = 0;
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, &kpID);
}
#endif
typedef RangePolicy<ExecSpace, size_t> policy_type;
typedef Kokkos::Impl::ParallelFor<Derived, policy_type> closure_type;
const closure_type closure(*(static_cast<Derived*>(this)), policy_type(0, size_in));
closure.execute();
#if defined(KOKKOS_ENABLE_PROFILING)
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::endParallelFor(kpID);
}
#endif
}
};
template <typename ExecSpace, typename ValueType>
struct ResetDuplicates<ExecSpace, ValueType, Kokkos::Experimental::ScatterSum> :
public ResetDuplicatesBase<ExecSpace, ValueType, Kokkos::Experimental::ScatterSum>
{
typedef ResetDuplicatesBase<ExecSpace, ValueType, Kokkos::Experimental::ScatterSum> Base;
ResetDuplicates(ValueType* data_in, size_t size_in, std::string const& name):
Base(data_in, size_in, name)
{}
KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
Base::data[i] = Kokkos::reduction_identity<ValueType>::sum();
}
};
}}} // Kokkos::Impl::Experimental
namespace Kokkos {
namespace Experimental {
template <typename DataType
,typename Layout = Kokkos::DefaultExecutionSpace::array_layout
,typename ExecSpace = Kokkos::DefaultExecutionSpace
,int Op = ScatterSum
,int duplication = Kokkos::Impl::Experimental::DefaultDuplication<ExecSpace>::value
,int contribution = Kokkos::Impl::Experimental::DefaultContribution<ExecSpace, duplication>::value
>
class ScatterView;
template <typename DataType
,int Op
,typename ExecSpace
,typename Layout
,int duplication
,int contribution
,int override_contribution
>
class ScatterAccess;
// non-duplicated implementation
template <typename DataType
,int Op
,typename ExecSpace
,typename Layout
,int contribution
>
class ScatterView<DataType
,Layout
,ExecSpace
,Op
,ScatterNonDuplicated
,contribution>
{
public:
typedef Kokkos::View<DataType, Layout, ExecSpace> original_view_type;
typedef typename original_view_type::value_type original_value_type;
typedef typename original_view_type::reference_type original_reference_type;
friend class ScatterAccess<DataType, Op, ExecSpace, Layout, ScatterNonDuplicated, contribution, ScatterNonAtomic>;
friend class ScatterAccess<DataType, Op, ExecSpace, Layout, ScatterNonDuplicated, contribution, ScatterAtomic>;
ScatterView()
{
}
template <typename RT, typename ... RP>
ScatterView(View<RT, RP...> const& original_view)
: internal_view(original_view)
{
}
template <typename ... Dims>
ScatterView(std::string const& name, Dims ... dims)
: internal_view(name, dims ...)
{
}
template <int override_contrib = contribution>
KOKKOS_FORCEINLINE_FUNCTION
ScatterAccess<DataType, Op, ExecSpace, Layout, ScatterNonDuplicated, contribution, override_contrib>
access() const {
return ScatterAccess<DataType, Op, ExecSpace, Layout, ScatterNonDuplicated, contribution, override_contrib>{*this};
}
original_view_type subview() const {
return internal_view;
}
template <typename DT, typename ... RP>
void contribute_into(View<DT, RP...> const& dest) const
{
typedef View<DT, RP...> dest_type;
static_assert(std::is_same<
typename dest_type::array_layout,
Layout>::value,
"ScatterView contribute destination has different layout");
static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
typename ExecSpace::memory_space,
typename dest_type::memory_space>::value,
"ScatterView contribute destination memory space not accessible");
if (dest.data() == internal_view.data()) return;
Kokkos::Impl::Experimental::ReduceDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data(),
dest.data(),
0,
0,
1,
internal_view.label());
}
void reset() {
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data(),
internal_view.size(),
internal_view.label());
}
template <typename DT, typename ... RP>
void reset_except(View<DT, RP...> const& view) {
if (view.data() != internal_view.data()) reset();
}
void resize(const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0,
const size_t n7 = 0) {
::Kokkos::resize(internal_view,n0,n1,n2,n3,n4,n5,n6,n7);
}
void realloc(const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0,
const size_t n7 = 0) {
::Kokkos::realloc(internal_view,n0,n1,n2,n3,n4,n5,n6,n7);
}
protected:
template <typename ... Args>
KOKKOS_FORCEINLINE_FUNCTION
original_reference_type at(Args ... args) const {
return internal_view(args...);
}
private:
typedef original_view_type internal_view_type;
internal_view_type internal_view;
};
template <typename DataType
,int Op
,typename ExecSpace
,typename Layout
,int contribution
,int override_contribution
>
class ScatterAccess<DataType
,Op
,ExecSpace
,Layout
,ScatterNonDuplicated
,contribution
,override_contribution>
{
public:
typedef ScatterView<DataType, Layout, ExecSpace, Op, ScatterNonDuplicated, contribution> view_type;
typedef typename view_type::original_value_type original_value_type;
typedef Kokkos::Impl::Experimental::ScatterValue<
original_value_type, Op, override_contribution> value_type;
KOKKOS_INLINE_FUNCTION
ScatterAccess(view_type const& view_in)
: view(view_in)
{
}
template <typename ... Args>
KOKKOS_FORCEINLINE_FUNCTION
value_type operator()(Args ... args) const {
return view.at(args...);
}
template <typename Arg>
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<view_type::original_view_type::rank == 1 &&
std::is_integral<Arg>::value, value_type>::type
operator[](Arg arg) const {
return view.at(arg);
}
private:
view_type const& view;
};
// duplicated implementation
// LayoutLeft and LayoutRight are different enough that we'll just specialize each
template <typename DataType
,int Op
,typename ExecSpace
,int contribution
>
class ScatterView<DataType
,Kokkos::LayoutRight
,ExecSpace
,Op
,ScatterDuplicated
,contribution>
{
public:
typedef Kokkos::View<DataType, Kokkos::LayoutRight, ExecSpace> original_view_type;
typedef typename original_view_type::value_type original_value_type;
typedef typename original_view_type::reference_type original_reference_type;
friend class ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutRight, ScatterDuplicated, contribution, ScatterNonAtomic>;
friend class ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutRight, ScatterDuplicated, contribution, ScatterAtomic>;
typedef typename Kokkos::Impl::Experimental::DuplicatedDataType<DataType, Kokkos::LayoutRight> data_type_info;
typedef typename data_type_info::value_type internal_data_type;
typedef Kokkos::View<internal_data_type, Kokkos::LayoutRight, ExecSpace> internal_view_type;
ScatterView()
{
}
template <typename RT, typename ... RP >
ScatterView(View<RT, RP...> const& original_view)
: unique_token()
, internal_view(Kokkos::ViewAllocateWithoutInitializing(
std::string("duplicated_") + original_view.label()),
unique_token.size(),
original_view.extent(0),
original_view.extent(1),
original_view.extent(2),
original_view.extent(3),
original_view.extent(4),
original_view.extent(5),
original_view.extent(6))
{
reset();
}
template <typename ... Dims>
ScatterView(std::string const& name, Dims ... dims)
: internal_view(Kokkos::ViewAllocateWithoutInitializing(name), unique_token.size(), dims ...)
{
reset();
}
template <int override_contribution = contribution>
inline
ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutRight, ScatterDuplicated, contribution, override_contribution>
access() const {
return ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutRight, ScatterDuplicated, contribution, override_contribution>{*this};
}
typename Kokkos::Impl::Experimental::Slice<
Kokkos::LayoutRight, internal_view_type::rank, internal_view_type>::value_type
subview() const
{
return Kokkos::Impl::Experimental::Slice<
Kokkos::LayoutRight, internal_view_type::Rank, internal_view_type>::get(internal_view, 0);
}
template <typename DT, typename ... RP>
void contribute_into(View<DT, RP...> const& dest) const
{
typedef View<DT, RP...> dest_type;
static_assert(std::is_same<
typename dest_type::array_layout,
Kokkos::LayoutRight>::value,
"ScatterView deep_copy destination has different layout");
static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
typename ExecSpace::memory_space,
typename dest_type::memory_space>::value,
"ScatterView deep_copy destination memory space not accessible");
size_t strides[8];
internal_view.stride(strides);
bool is_equal = (dest.data() == internal_view.data());
size_t start = is_equal ? 1 : 0;
Kokkos::Impl::Experimental::ReduceDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data(),
dest.data(),
strides[0],
start,
internal_view.extent(0),
internal_view.label());
}
void reset() {
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data(),
internal_view.size(),
internal_view.label());
}
template <typename DT, typename ... RP>
void reset_except(View<DT, RP...> const& view) {
if (view.data() != internal_view.data()) {
reset();
return;
}
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data() + view.size(),
internal_view.size() - view.size(),
internal_view.label());
}
void resize(const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0) {
::Kokkos::resize(internal_view,unique_token.size(),n0,n1,n2,n3,n4,n5,n6);
}
void realloc(const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0) {
::Kokkos::realloc(internal_view,unique_token.size(),n0,n1,n2,n3,n4,n5,n6);
}
protected:
template <typename ... Args>
KOKKOS_FORCEINLINE_FUNCTION
original_reference_type at(int rank, Args ... args) const {
return internal_view(rank, args...);
}
protected:
typedef Kokkos::Experimental::UniqueToken<
ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type;
unique_token_type unique_token;
internal_view_type internal_view;
};
template <typename DataType
,int Op
,typename ExecSpace
,int contribution
>
class ScatterView<DataType
,Kokkos::LayoutLeft
,ExecSpace
,Op
,ScatterDuplicated
,contribution>
{
public:
typedef Kokkos::View<DataType, Kokkos::LayoutLeft, ExecSpace> original_view_type;
typedef typename original_view_type::value_type original_value_type;
typedef typename original_view_type::reference_type original_reference_type;
friend class ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutLeft, ScatterDuplicated, contribution, ScatterNonAtomic>;
friend class ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutLeft, ScatterDuplicated, contribution, ScatterAtomic>;
typedef typename Kokkos::Impl::Experimental::DuplicatedDataType<DataType, Kokkos::LayoutLeft> data_type_info;
typedef typename data_type_info::value_type internal_data_type;
typedef Kokkos::View<internal_data_type, Kokkos::LayoutLeft, ExecSpace> internal_view_type;
ScatterView()
{
}
template <typename RT, typename ... RP >
ScatterView(View<RT, RP...> const& original_view)
: unique_token()
{
size_t arg_N[8] = {
original_view.extent(0),
original_view.extent(1),
original_view.extent(2),
original_view.extent(3),
original_view.extent(4),
original_view.extent(5),
original_view.extent(6),
0
};
arg_N[internal_view_type::rank - 1] = unique_token.size();
internal_view = internal_view_type(
Kokkos::ViewAllocateWithoutInitializing(
std::string("duplicated_") + original_view.label()),
arg_N[0], arg_N[1], arg_N[2], arg_N[3],
arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
reset();
}
template <typename ... Dims>
ScatterView(std::string const& name, Dims ... dims)
: internal_view(Kokkos::ViewAllocateWithoutInitializing(name), dims ..., unique_token.size())
{
reset();
}
template <int override_contribution = contribution>
inline
ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutLeft, ScatterDuplicated, contribution, override_contribution>
access() const {
return ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutLeft, ScatterDuplicated, contribution, override_contribution>{*this};
}
typename Kokkos::Impl::Experimental::Slice<
Kokkos::LayoutLeft, internal_view_type::rank, internal_view_type>::value_type
subview() const
{
return Kokkos::Impl::Experimental::Slice<
Kokkos::LayoutLeft, internal_view_type::rank, internal_view_type>::get(internal_view, 0);
}
template <typename ... RP>
void contribute_into(View<DataType, RP...> const& dest) const
{
typedef View<DataType, RP...> dest_type;
static_assert(std::is_same<
typename dest_type::array_layout,
Kokkos::LayoutLeft>::value,
"ScatterView deep_copy destination has different layout");
static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
typename ExecSpace::memory_space,
typename dest_type::memory_space>::value,
"ScatterView deep_copy destination memory space not accessible");
size_t strides[8];
internal_view.stride(strides);
size_t stride = strides[internal_view_type::rank - 1];
auto extent = internal_view.extent(
internal_view_type::rank - 1);
bool is_equal = (dest.data() == internal_view.data());
size_t start = is_equal ? 1 : 0;
Kokkos::Impl::Experimental::ReduceDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data(),
dest.data(),
stride,
start,
extent,
internal_view.label());
}
void reset() {
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data(),
internal_view.size(),
internal_view.label());
}
template <typename DT, typename ... RP>
void reset_except(View<DT, RP...> const& view) {
if (view.data() != internal_view.data()) {
reset();
return;
}
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data() + view.size(),
internal_view.size() - view.size(),
internal_view.label());
}
void resize(const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0) {
size_t arg_N[8] = {n0,n1,n2,n3,n4,n5,n6,0};
const int i = internal_view.rank-1;
arg_N[i] = unique_token.size();
::Kokkos::resize(internal_view,
arg_N[0], arg_N[1], arg_N[2], arg_N[3],
arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
}
void realloc(const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0) {
size_t arg_N[8] = {n0,n1,n2,n3,n4,n5,n6,0};
const int i = internal_view.rank-1;
arg_N[i] = unique_token.size();
::Kokkos::realloc(internal_view,
arg_N[0], arg_N[1], arg_N[2], arg_N[3],
arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
}
protected:
template <typename ... Args>
inline original_reference_type at(int thread_id, Args ... args) const {
return internal_view(args..., thread_id);
}
protected:
typedef Kokkos::Experimental::UniqueToken<
ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type;
unique_token_type unique_token;
internal_view_type internal_view;
};
/* This object has to be separate in order to store the thread ID, which cannot
be obtained until one is inside a parallel construct, and may be relatively
expensive to obtain at every contribution
(calls a non-inlined function, looks up a thread-local variable).
Due to the expense, it is sensible to query it at most once per parallel iterate
(ideally once per thread, but parallel_for doesn't expose that)
and then store it in a stack variable.
ScatterAccess serves as a non-const object on the stack which can store the thread ID */
template <typename DataType
,int Op
,typename ExecSpace
,typename Layout
,int contribution
,int override_contribution
>
class ScatterAccess<DataType
,Op
,ExecSpace
,Layout
,ScatterDuplicated
,contribution
,override_contribution>
{
public:
typedef ScatterView<DataType, Layout, ExecSpace, Op, ScatterDuplicated, contribution> view_type;
typedef typename view_type::original_value_type original_value_type;
typedef Kokkos::Impl::Experimental::ScatterValue<
original_value_type, Op, override_contribution> value_type;
inline ScatterAccess(view_type const& view_in)
: view(view_in)
, thread_id(view_in.unique_token.acquire()) {
}
inline ~ScatterAccess() {
if (thread_id != ~thread_id_type(0)) view.unique_token.release(thread_id);
}
template <typename ... Args>
KOKKOS_FORCEINLINE_FUNCTION
value_type operator()(Args ... args) const {
return view.at(thread_id, args...);
}
template <typename Arg>
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<view_type::original_view_type::rank == 1 &&
std::is_integral<Arg>::value, value_type>::type
operator[](Arg arg) const {
return view.at(thread_id, arg);
}
private:
view_type const& view;
// simplify RAII by disallowing copies
ScatterAccess(ScatterAccess const& other) = delete;
ScatterAccess& operator=(ScatterAccess const& other) = delete;
ScatterAccess& operator=(ScatterAccess&& other) = delete;
public:
// do need to allow moves though, for the common
// auto b = a.access();
// that assignments turns into a move constructor call
inline ScatterAccess(ScatterAccess&& other)
: view(other.view)
, thread_id(other.thread_id)
{
other.thread_id = ~thread_id_type(0);
}
private:
typedef typename view_type::unique_token_type unique_token_type;
typedef typename unique_token_type::size_type thread_id_type;
thread_id_type thread_id;
};
template <int Op = Kokkos::Experimental::ScatterSum,
int duplication = -1,
int contribution = -1,
typename RT, typename ... RP>
ScatterView
< RT
, typename ViewTraits<RT, RP...>::array_layout
, typename ViewTraits<RT, RP...>::execution_space
, Op
/* just setting defaults if not specified... things got messy because the view type
does not come before the duplication/contribution settings in the
template parameter list */
, duplication == -1 ? Kokkos::Impl::Experimental::DefaultDuplication<typename ViewTraits<RT, RP...>::execution_space>::value : duplication
, contribution == -1 ?
Kokkos::Impl::Experimental::DefaultContribution<
typename ViewTraits<RT, RP...>::execution_space,
(duplication == -1 ?
Kokkos::Impl::Experimental::DefaultDuplication<
typename ViewTraits<RT, RP...>::execution_space
>::value
: duplication
)
>::value
: contribution
>
create_scatter_view(View<RT, RP...> const& original_view) {
return original_view; // implicit ScatterView constructor call
}
}} // namespace Kokkos::Experimental
namespace Kokkos {
namespace Experimental {
template <typename DT1, typename DT2, typename LY, typename ES, int OP, int CT, int DP, typename ... VP>
void
contribute(View<DT1, VP...>& dest, Kokkos::Experimental::ScatterView<DT2, LY, ES, OP, CT, DP> const& src)
{
src.contribute_into(dest);
}
}} // namespace Kokkos::Experimental
namespace Kokkos {
template <typename DT, typename LY, typename ES, int OP, int CT, int DP, typename ... IS>
void
realloc(Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view, IS ... is)
{
scatter_view.realloc(is ...);
}
template <typename DT, typename LY, typename ES, int OP, int CT, int DP, typename ... IS>
void
resize(Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view, IS ... is)
{
scatter_view.resize(is ...);
}
} // namespace Kokkos
#endif

View File

@ -56,6 +56,7 @@
template< class Scalar, class Arg1Type = void>
class vector : public DualView<Scalar*,LayoutLeft,Arg1Type> {
public:
typedef Scalar value_type;
typedef Scalar* pointer;
typedef const Scalar* const_pointer;

View File

@ -3,7 +3,13 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
SET(LIBRARIES kokkoscore)
IF(NOT KOKKOS_HAS_TRILINOS)
IF(KOKKOS_SEPARATE_LIBS)
set(TEST_LINK_TARGETS kokkoscore)
ELSE()
set(TEST_LINK_TARGETS kokkos)
ENDIF()
ENDIF()
IF(Kokkos_ENABLE_Pthread)
TRIBITS_ADD_EXECUTABLE_AND_TEST(
@ -12,7 +18,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)
ENDIF()
@ -23,7 +29,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)
ENDIF()
@ -34,7 +40,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)
ENDIF()
@ -45,7 +51,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)
ENDIF()

View File

@ -15,7 +15,8 @@ endif
CXXFLAGS = -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
LDFLAGS ?=
override LDFLAGS += -lpthread
include $(KOKKOS_PATH)/Makefile.kokkos

View File

@ -62,6 +62,7 @@
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestDynamicView.hpp>
#include <TestScatterView.hpp>
#include <Kokkos_DynRankView.hpp>
#include <TestDynViewAPI.hpp>
@ -201,10 +202,18 @@ void cuda_test_bitset()
cuda_test_dualview_combinations(size); \
}
#define CUDA_SCATTERVIEW_TEST( size ) \
TEST_F( cuda, scatterview_##size##x) { \
test_scatter_view<Kokkos::Cuda>(size); \
}
CUDA_DUALVIEW_COMBINE_TEST( 10 )
CUDA_VECTOR_COMBINE_TEST( 10 )
CUDA_VECTOR_COMBINE_TEST( 3057 )
CUDA_SCATTERVIEW_TEST( 10 )
CUDA_SCATTERVIEW_TEST( 1000000 )
CUDA_INSERT_TEST(close, 100000, 90000, 100, 500)
CUDA_INSERT_TEST(far, 100000, 90000, 100, 500)

View File

@ -63,6 +63,8 @@
#include <Kokkos_DynRankView.hpp>
#include <TestDynViewAPI.hpp>
#include <TestScatterView.hpp>
#include <Kokkos_ErrorReporter.hpp>
#include <TestErrorReporter.hpp>
@ -152,6 +154,11 @@ TEST_F( openmp , staticcrsgraph )
test_dualview_combinations<int,Kokkos::OpenMP>(size); \
}
#define OPENMP_SCATTERVIEW_TEST( size ) \
TEST_F( openmp, scatterview_##size##x) { \
test_scatter_view<Kokkos::OpenMP>(size); \
}
OPENMP_INSERT_TEST(close, 100000, 90000, 100, 500, true)
OPENMP_INSERT_TEST(far, 100000, 90000, 100, 500, false)
OPENMP_FAILED_INSERT_TEST( 10000, 1000 )
@ -161,6 +168,10 @@ OPENMP_VECTOR_COMBINE_TEST( 10 )
OPENMP_VECTOR_COMBINE_TEST( 3057 )
OPENMP_DUALVIEW_COMBINE_TEST( 10 )
OPENMP_SCATTERVIEW_TEST( 10 )
OPENMP_SCATTERVIEW_TEST( 1000000 )
#undef OPENMP_INSERT_TEST
#undef OPENMP_FAILED_INSERT_TEST
#undef OPENMP_ASSIGNEMENT_TEST

View File

@ -0,0 +1,156 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TEST_SCATTER_VIEW_HPP
#define KOKKOS_TEST_SCATTER_VIEW_HPP
#include <Kokkos_ScatterView.hpp>
namespace Test {
template <typename ExecSpace, typename Layout, int duplication, int contribution>
void test_scatter_view_config(int n)
{
Kokkos::View<double *[3], Layout, ExecSpace> original_view("original_view", n);
{
auto scatter_view = Kokkos::Experimental::create_scatter_view
< Kokkos::Experimental::ScatterSum
, duplication
, contribution
> (original_view);
#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
auto policy = Kokkos::RangePolicy<ExecSpace, int>(0, n);
auto f = KOKKOS_LAMBDA(int i) {
auto scatter_access = scatter_view.access();
auto scatter_access_atomic = scatter_view.template access<Kokkos::Experimental::ScatterAtomic>();
for (int j = 0; j < 10; ++j) {
auto k = (i + j) % n;
scatter_access(k, 0) += 4.2;
scatter_access_atomic(k, 1) += 2.0;
scatter_access(k, 2) += 1.0;
}
};
Kokkos::parallel_for(policy, f, "scatter_view_test");
#endif
Kokkos::Experimental::contribute(original_view, scatter_view);
scatter_view.reset_except(original_view);
#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
Kokkos::parallel_for(policy, f, "scatter_view_test");
#endif
Kokkos::Experimental::contribute(original_view, scatter_view);
}
#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), original_view);
for (typename decltype(host_view)::size_type i = 0; i < host_view.dimension_0(); ++i) {
auto val0 = host_view(i, 0);
auto val1 = host_view(i, 1);
auto val2 = host_view(i, 2);
EXPECT_TRUE(std::fabs((val0 - 84.0) / 84.0) < 1e-15);
EXPECT_TRUE(std::fabs((val1 - 40.0) / 40.0) < 1e-15);
EXPECT_TRUE(std::fabs((val2 - 20.0) / 20.0) < 1e-15);
}
#endif
{
Kokkos::Experimental::ScatterView
< double*[3]
, Layout
, ExecSpace
, Kokkos::Experimental::ScatterSum
, duplication
, contribution
>
persistent_view("persistent", n);
auto result_view = persistent_view.subview();
contribute(result_view, persistent_view);
}
}
template <typename ExecSpace>
struct TestDuplicatedScatterView {
TestDuplicatedScatterView(int n) {
test_scatter_view_config<ExecSpace, Kokkos::LayoutRight,
Kokkos::Experimental::ScatterDuplicated,
Kokkos::Experimental::ScatterNonAtomic>(n);
test_scatter_view_config<ExecSpace, Kokkos::LayoutRight,
Kokkos::Experimental::ScatterDuplicated,
Kokkos::Experimental::ScatterAtomic>(n);
}
};
#ifdef KOKKOS_ENABLE_CUDA
// disable duplicated instantiation with CUDA until
// UniqueToken can support it
template <>
struct TestDuplicatedScatterView<Kokkos::Cuda> {
TestDuplicatedScatterView(int) {
}
};
#endif
template <typename ExecSpace>
void test_scatter_view(int n)
{
// all of these configurations should compile okay, but only some of them are
// correct and/or sensible in terms of memory use
Kokkos::Experimental::UniqueToken<ExecSpace> unique_token{ExecSpace()};
// no atomics or duplication is only sensible if the execution space
// is running essentially in serial (doesn't have to be Serial though,
// we also test OpenMP with one thread: LAMMPS cares about that)
if (unique_token.size() == 1) {
test_scatter_view_config<ExecSpace, Kokkos::LayoutRight,
Kokkos::Experimental::ScatterNonDuplicated,
Kokkos::Experimental::ScatterNonAtomic>(n);
}
test_scatter_view_config<ExecSpace, Kokkos::LayoutRight,
Kokkos::Experimental::ScatterNonDuplicated,
Kokkos::Experimental::ScatterAtomic>(n);
TestDuplicatedScatterView<ExecSpace> duptest(n);
}
} // namespace Test
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP

View File

@ -58,6 +58,7 @@
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestDynamicView.hpp>
#include <TestScatterView.hpp>
#include <iomanip>
@ -148,6 +149,11 @@ TEST_F( serial, bitset )
test_dualview_combinations<int,Kokkos::Serial>(size); \
}
#define SERIAL_SCATTERVIEW_TEST( size ) \
TEST_F( serial, scatterview_##size##x) { \
test_scatter_view<Kokkos::Serial>(size); \
}
SERIAL_INSERT_TEST(close, 100000, 90000, 100, 500, true)
SERIAL_INSERT_TEST(far, 100000, 90000, 100, 500, false)
SERIAL_FAILED_INSERT_TEST( 10000, 1000 )
@ -157,6 +163,10 @@ SERIAL_VECTOR_COMBINE_TEST( 10 )
SERIAL_VECTOR_COMBINE_TEST( 3057 )
SERIAL_DUALVIEW_COMBINE_TEST( 10 )
SERIAL_SCATTERVIEW_TEST( 10 )
SERIAL_SCATTERVIEW_TEST( 1000000 )
#undef SERIAL_INSERT_TEST
#undef SERIAL_FAILED_INSERT_TEST
#undef SERIAL_ASSIGNEMENT_TEST

View File

@ -2,7 +2,9 @@
TRIBITS_SUBPACKAGE(Core)
ADD_SUBDIRECTORY(src)
IF(KOKKOS_HAS_TRILINOS)
ADD_SUBDIRECTORY(src)
ENDIF()
TRIBITS_ADD_TEST_DIRECTORIES(unit_test)
TRIBITS_ADD_TEST_DIRECTORIES(perf_test)

View File

@ -2,6 +2,14 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
IF(NOT KOKKOS_HAS_TRILINOS)
IF(KOKKOS_SEPARATE_LIBS)
set(TEST_LINK_TARGETS kokkoscore)
ELSE()
set(TEST_LINK_TARGETS kokkos)
ENDIF()
ENDIF()
# warning: PerfTest_CustomReduction.cpp uses
# ../../algorithms/src/Kokkos_Random.hpp
# we'll just allow it to be included, but note
@ -23,7 +31,7 @@ TRIBITS_ADD_EXECUTABLE(
PerfTestExec
SOURCES ${SOURCES}
COMM serial mpi
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)
TRIBITS_ADD_TEST(

View File

@ -17,7 +17,8 @@ endif
CXXFLAGS = -O3
#CXXFLAGS += -DGENERIC_REDUCER
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
LDFLAGS ?=
override LDFLAGS += -lpthread
include $(KOKKOS_PATH)/Makefile.kokkos

View File

@ -1,15 +1,4 @@
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Serial
KOKKOS_HAVE_SERIAL
"Whether to enable the Kokkos::Serial device. This device executes \"parallel\" kernels sequentially on a single CPU thread. It is enabled by default. If you disable this device, please enable at least one other CPU device, such as Kokkos::OpenMP or Kokkos::Threads."
ON
)
ASSERT_DEFINED(${PROJECT_NAME}_ENABLE_CXX11)
ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_CUDA)
TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
@ -20,68 +9,90 @@ SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DI
#-----------------------------------------------------------------------------
SET(HEADERS_PUBLIC "")
SET(HEADERS_PRIVATE "")
SET(SOURCES "")
IF(KOKKOS_LEGACY_TRIBITS)
FILE(GLOB HEADERS_PUBLIC Kokkos*.hpp)
LIST( APPEND HEADERS_PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h )
ASSERT_DEFINED(${PROJECT_NAME}_ENABLE_CXX11)
ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_CUDA)
SET(HEADERS_PUBLIC "")
SET(HEADERS_PRIVATE "")
SET(SOURCES "")
FILE(GLOB HEADERS_PUBLIC Kokkos*.hpp)
LIST( APPEND HEADERS_PUBLIC ${CMAKE_BINARY_DIR}/${PACKAGE_NAME}_config.h )
#-----------------------------------------------------------------------------
FILE(GLOB HEADERS_IMPL impl/*.hpp)
FILE(GLOB SOURCES_IMPL impl/*.cpp)
LIST(APPEND HEADERS_PRIVATE ${HEADERS_IMPL} )
LIST(APPEND SOURCES ${SOURCES_IMPL} )
INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/)
#-----------------------------------------------------------------------------
FILE(GLOB HEADERS_THREADS Threads/*.hpp)
FILE(GLOB SOURCES_THREADS Threads/*.cpp)
LIST(APPEND HEADERS_PRIVATE ${HEADERS_THREADS} )
LIST(APPEND SOURCES ${SOURCES_THREADS} )
INSTALL(FILES ${HEADERS_THREADS} DESTINATION ${TRILINOS_INCDIR}/Threads/)
#-----------------------------------------------------------------------------
FILE(GLOB HEADERS_OPENMP OpenMP/*.hpp)
FILE(GLOB SOURCES_OPENMP OpenMP/*.cpp)
LIST(APPEND HEADERS_PRIVATE ${HEADERS_OPENMP} )
LIST(APPEND SOURCES ${SOURCES_OPENMP} )
INSTALL(FILES ${HEADERS_OPENMP} DESTINATION ${TRILINOS_INCDIR}/OpenMP/)
#-----------------------------------------------------------------------------
FILE(GLOB HEADERS_CUDA Cuda/*.hpp)
FILE(GLOB SOURCES_CUDA Cuda/*.cpp)
LIST(APPEND HEADERS_PRIVATE ${HEADERS_CUDA} )
LIST(APPEND SOURCES ${SOURCES_CUDA} )
INSTALL(FILES ${HEADERS_CUDA} DESTINATION ${TRILINOS_INCDIR}/Cuda/)
#-----------------------------------------------------------------------------
FILE(GLOB HEADERS_QTHREADS Qthreads/*.hpp)
FILE(GLOB SOURCES_QTHREADS Qthreads/*.cpp)
LIST(APPEND HEADERS_PRIVATE ${HEADERS_QTHREADS} )
LIST(APPEND SOURCES ${SOURCES_QTHREADS} )
INSTALL(FILES ${HEADERS_QTHREADS} DESTINATION ${TRILINOS_INCDIR}/Qthreads/)
TRIBITS_ADD_LIBRARY(
kokkoscore
HEADERS ${HEADERS_PUBLIC}
NOINSTALLHEADERS ${HEADERS_PRIVATE}
SOURCES ${SOURCES}
DEPLIBS
)
#-----------------------------------------------------------------------------
# In the new build system, sources are calculated by Makefile.kokkos
else()
FILE(GLOB HEADERS_IMPL impl/*.hpp)
FILE(GLOB SOURCES_IMPL impl/*.cpp)
INSTALL (DIRECTORY
"${CMAKE_CURRENT_SOURCE_DIR}/"
DESTINATION ${TRILINOS_INCDIR}
FILES_MATCHING PATTERN "*.hpp"
)
LIST(APPEND HEADERS_PRIVATE ${HEADERS_IMPL} )
LIST(APPEND SOURCES ${SOURCES_IMPL} )
INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/)
TRIBITS_ADD_LIBRARY(
kokkoscore
SOURCES ${KOKKOS_CORE_SRCS}
DEPLIBS
)
endif()
#-----------------------------------------------------------------------------
FILE(GLOB HEADERS_THREADS Threads/*.hpp)
FILE(GLOB SOURCES_THREADS Threads/*.cpp)
LIST(APPEND HEADERS_PRIVATE ${HEADERS_THREADS} )
LIST(APPEND SOURCES ${SOURCES_THREADS} )
INSTALL(FILES ${HEADERS_THREADS} DESTINATION ${TRILINOS_INCDIR}/Threads/)
#-----------------------------------------------------------------------------
FILE(GLOB HEADERS_OPENMP OpenMP/*.hpp)
FILE(GLOB SOURCES_OPENMP OpenMP/*.cpp)
LIST(APPEND HEADERS_PRIVATE ${HEADERS_OPENMP} )
LIST(APPEND SOURCES ${SOURCES_OPENMP} )
INSTALL(FILES ${HEADERS_OPENMP} DESTINATION ${TRILINOS_INCDIR}/OpenMP/)
#-----------------------------------------------------------------------------
FILE(GLOB HEADERS_CUDA Cuda/*.hpp)
FILE(GLOB SOURCES_CUDA Cuda/*.cpp)
LIST(APPEND HEADERS_PRIVATE ${HEADERS_CUDA} )
LIST(APPEND SOURCES ${SOURCES_CUDA} )
INSTALL(FILES ${HEADERS_CUDA} DESTINATION ${TRILINOS_INCDIR}/Cuda/)
#-----------------------------------------------------------------------------
FILE(GLOB HEADERS_QTHREADS Qthreads/*.hpp)
FILE(GLOB SOURCES_QTHREADS Qthreads/*.cpp)
LIST(APPEND HEADERS_PRIVATE ${HEADERS_QTHREADS} )
LIST(APPEND SOURCES ${SOURCES_QTHREADS} )
INSTALL(FILES ${HEADERS_QTHREADS} DESTINATION ${TRILINOS_INCDIR}/Qthreads/)
#-----------------------------------------------------------------------------
TRIBITS_ADD_LIBRARY(
kokkoscore
HEADERS ${HEADERS_PUBLIC}
NOINSTALLHEADERS ${HEADERS_PRIVATE}
SOURCES ${SOURCES}
DEPLIBS
)

View File

@ -366,7 +366,7 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >::
if(Kokkos::Profiling::profileLibraryLoaded()) {
SharedAllocationHeader header ;
Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>::DeepCopy( & header , RecordBase::m_alloc_ptr , sizeof(SharedAllocationHeader) );
Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>( & header , RecordBase::m_alloc_ptr , sizeof(SharedAllocationHeader) );
Kokkos::Profiling::deallocateData(
Kokkos::Profiling::SpaceHandle(Kokkos::CudaSpace::name()),header.m_label,
@ -446,7 +446,7 @@ SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
);
// Copy to device memory
Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>::DeepCopy( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) );
Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) );
}
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
@ -655,7 +655,7 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr
Header const * const head_cuda = alloc_ptr ? Header::get_header( alloc_ptr ) : (Header*) 0 ;
if ( alloc_ptr ) {
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , head_cuda , sizeof(SharedAllocationHeader) );
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>( & head , head_cuda , sizeof(SharedAllocationHeader) );
}
RecordCuda * const record = alloc_ptr ? static_cast< RecordCuda * >( head.m_record ) : (RecordCuda *) 0 ;
@ -724,7 +724,7 @@ print_records( std::ostream & s , const Kokkos::CudaSpace & , bool detail )
if ( detail ) {
do {
if ( r->m_alloc_ptr ) {
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
}
else {
head.m_label[0] = 0 ;
@ -759,7 +759,7 @@ print_records( std::ostream & s , const Kokkos::CudaSpace & , bool detail )
do {
if ( r->m_alloc_ptr ) {
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
//Formatting dependent on sizeof(uintptr_t)
const char * format_string;

View File

@ -648,10 +648,11 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTagFwd > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ;
public:
@ -721,7 +722,7 @@ public:
}
// Reduce with final value at blockDim.y - 1 location.
if ( cuda_single_inter_block_reduce_scan<false,ReducerTypeFwd,WorkTag>(
if ( cuda_single_inter_block_reduce_scan<false,ReducerTypeFwd,WorkTagFwd>(
ReducerConditional::select(m_functor , m_reducer) , blockIdx.x , gridDim.x ,
kokkos_impl_cuda_shared_memory<size_type>() , m_scratch_space , m_scratch_flags ) ) {
@ -731,7 +732,7 @@ public:
size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ;
if ( threadIdx.y == 0 ) {
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
}
if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); }
@ -766,11 +767,11 @@ public:
value_type init;
ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &init);
if(Impl::cuda_inter_block_reduction<ReducerTypeFwd,ValueJoin,WorkTag>
if(Impl::cuda_inter_block_reduction<ReducerTypeFwd,ValueJoin,WorkTagFwd>
(value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,max_active_thread)) {
const unsigned id = threadIdx.y*blockDim.x + threadIdx.x;
if(id==0) {
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
*result = value;
}
}
@ -875,10 +876,11 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTagFwd > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ;
public:
@ -942,7 +944,7 @@ public:
// Reduce with final value at blockDim.y - 1 location.
// Problem: non power-of-two blockDim
if ( cuda_single_inter_block_reduce_scan<false,ReducerTypeFwd,WorkTag>(
if ( cuda_single_inter_block_reduce_scan<false,ReducerTypeFwd,WorkTagFwd>(
ReducerConditional::select(m_functor , m_reducer) , blockIdx.x , gridDim.x ,
kokkos_impl_cuda_shared_memory<size_type>() , m_scratch_space , m_scratch_flags ) ) {
@ -951,7 +953,7 @@ public:
size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ;
if ( threadIdx.y == 0 ) {
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
}
if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); }
@ -983,11 +985,11 @@ public:
value_type init;
ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &init);
if(Impl::cuda_inter_block_reduction<ReducerTypeFwd,ValueJoin,WorkTag>
if(Impl::cuda_inter_block_reduction<ReducerTypeFwd,ValueJoin,WorkTagFwd>
(value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,max_active_thread)) {
const unsigned id = threadIdx.y*blockDim.x + threadIdx.x;
if(id==0) {
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
*result = value;
}
}
@ -1100,10 +1102,11 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTagFwd > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
@ -1222,7 +1225,7 @@ public:
size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ;
if ( threadIdx.y == 0 ) {
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
}
if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); }
@ -1260,7 +1263,7 @@ public:
(value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,blockDim.y)) {
const unsigned id = threadIdx.y*blockDim.x + threadIdx.x;
if(id==0) {
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
*result = value;
}
}

View File

@ -69,7 +69,7 @@ void cuda_shfl( T & out , T const & in , int lane ,
typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width )
{
*reinterpret_cast<int*>(&out) =
__shfl( *reinterpret_cast<int const *>(&in) , lane , width );
KOKKOS_IMPL_CUDA_SHFL( *reinterpret_cast<int const *>(&in) , lane , width );
}
template< typename T >
@ -83,7 +83,7 @@ void cuda_shfl( T & out , T const & in , int lane ,
for ( int i = 0 ; i < N ; ++i ) {
reinterpret_cast<int*>(&out)[i] =
__shfl( reinterpret_cast<int const *>(&in)[i] , lane , width );
KOKKOS_IMPL_CUDA_SHFL( reinterpret_cast<int const *>(&in)[i] , lane , width );
}
}
@ -95,7 +95,7 @@ void cuda_shfl_down( T & out , T const & in , int delta ,
typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width )
{
*reinterpret_cast<int*>(&out) =
__shfl_down( *reinterpret_cast<int const *>(&in) , delta , width );
KOKKOS_IMPL_CUDA_SHFL_DOWN( *reinterpret_cast<int const *>(&in) , delta , width );
}
template< typename T >
@ -109,7 +109,7 @@ void cuda_shfl_down( T & out , T const & in , int delta ,
for ( int i = 0 ; i < N ; ++i ) {
reinterpret_cast<int*>(&out)[i] =
__shfl_down( reinterpret_cast<int const *>(&in)[i] , delta , width );
KOKKOS_IMPL_CUDA_SHFL_DOWN( reinterpret_cast<int const *>(&in)[i] , delta , width );
}
}
@ -121,7 +121,7 @@ void cuda_shfl_up( T & out , T const & in , int delta ,
typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width )
{
*reinterpret_cast<int*>(&out) =
__shfl_up( *reinterpret_cast<int const *>(&in) , delta , width );
KOKKOS_IMPL_CUDA_SHFL_UP( *reinterpret_cast<int const *>(&in) , delta , width );
}
template< typename T >
@ -135,7 +135,7 @@ void cuda_shfl_up( T & out , T const & in , int delta ,
for ( int i = 0 ; i < N ; ++i ) {
reinterpret_cast<int*>(&out)[i] =
__shfl_up( reinterpret_cast<int const *>(&in)[i] , delta , width );
KOKKOS_IMPL_CUDA_SHFL_UP( reinterpret_cast<int const *>(&in)[i] , delta , width );
}
}
@ -268,31 +268,31 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT
if( id + 1 < int(gridDim.x) )
join(value, tmp);
}
int active = __ballot(1);
int active = KOKKOS_IMPL_CUDA_BALLOT(1);
if (int(blockDim.x*blockDim.y) > 2) {
value_type tmp = Kokkos::shfl_down(value, 2,32);
if( id + 2 < int(gridDim.x) )
join(value, tmp);
}
active += __ballot(1);
active += KOKKOS_IMPL_CUDA_BALLOT(1);
if (int(blockDim.x*blockDim.y) > 4) {
value_type tmp = Kokkos::shfl_down(value, 4,32);
if( id + 4 < int(gridDim.x) )
join(value, tmp);
}
active += __ballot(1);
active += KOKKOS_IMPL_CUDA_BALLOT(1);
if (int(blockDim.x*blockDim.y) > 8) {
value_type tmp = Kokkos::shfl_down(value, 8,32);
if( id + 8 < int(gridDim.x) )
join(value, tmp);
}
active += __ballot(1);
active += KOKKOS_IMPL_CUDA_BALLOT(1);
if (int(blockDim.x*blockDim.y) > 16) {
value_type tmp = Kokkos::shfl_down(value, 16,32);
if( id + 16 < int(gridDim.x) )
join(value, tmp);
}
active += __ballot(1);
active += KOKKOS_IMPL_CUDA_BALLOT(1);
}
}
//The last block has in its thread=0 the global reduction value through "value"
@ -432,31 +432,31 @@ cuda_inter_block_reduction( const ReducerType& reducer,
if( id + 1 < int(gridDim.x) )
reducer.join(value, tmp);
}
int active = __ballot(1);
int active = KOKKOS_IMPL_CUDA_BALLOT(1);
if (int(blockDim.x*blockDim.y) > 2) {
value_type tmp = Kokkos::shfl_down(value, 2,32);
if( id + 2 < int(gridDim.x) )
reducer.join(value, tmp);
}
active += __ballot(1);
active += KOKKOS_IMPL_CUDA_BALLOT(1);
if (int(blockDim.x*blockDim.y) > 4) {
value_type tmp = Kokkos::shfl_down(value, 4,32);
if( id + 4 < int(gridDim.x) )
reducer.join(value, tmp);
}
active += __ballot(1);
active += KOKKOS_IMPL_CUDA_BALLOT(1);
if (int(blockDim.x*blockDim.y) > 8) {
value_type tmp = Kokkos::shfl_down(value, 8,32);
if( id + 8 < int(gridDim.x) )
reducer.join(value, tmp);
}
active += __ballot(1);
active += KOKKOS_IMPL_CUDA_BALLOT(1);
if (int(blockDim.x*blockDim.y) > 16) {
value_type tmp = Kokkos::shfl_down(value, 16,32);
if( id + 16 < int(gridDim.x) )
reducer.join(value, tmp);
}
active += __ballot(1);
active += KOKKOS_IMPL_CUDA_BALLOT(1);
}
}

View File

@ -73,16 +73,16 @@ public:
KOKKOS_INLINE_FUNCTION
UniqueToken() : m_buffer(0), m_count(0) {}
KOKKOS_INLINE_FUNCTION
KOKKOS_FUNCTION_DEFAULTED
UniqueToken( const UniqueToken & ) = default;
KOKKOS_INLINE_FUNCTION
KOKKOS_FUNCTION_DEFAULTED
UniqueToken( UniqueToken && ) = default;
KOKKOS_INLINE_FUNCTION
KOKKOS_FUNCTION_DEFAULTED
UniqueToken & operator=( const UniqueToken & ) = default ;
KOKKOS_INLINE_FUNCTION
KOKKOS_FUNCTION_DEFAULTED
UniqueToken & operator=( UniqueToken && ) = default ;
/// \brief upper bound for acquired values, i.e. 0 <= value < size()

View File

@ -47,7 +47,7 @@
#ifdef KOKKOS_ENABLE_CUDA
#include <Kokkos_Cuda.hpp>
#include <Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
namespace Kokkos {
@ -91,12 +91,12 @@ namespace Impl {
KOKKOS_INLINE_FUNCTION
int shfl(const int &val, const int& srcLane, const int& width ) {
return __shfl(val,srcLane,width);
return KOKKOS_IMPL_CUDA_SHFL(val,srcLane,width);
}
KOKKOS_INLINE_FUNCTION
float shfl(const float &val, const int& srcLane, const int& width ) {
return __shfl(val,srcLane,width);
return KOKKOS_IMPL_CUDA_SHFL(val,srcLane,width);
}
template<typename Scalar>
@ -105,7 +105,7 @@ namespace Impl {
) {
Scalar tmp1 = val;
float tmp = *reinterpret_cast<float*>(&tmp1);
tmp = __shfl(tmp,srcLane,width);
tmp = KOKKOS_IMPL_CUDA_SHFL(tmp,srcLane,width);
return *reinterpret_cast<Scalar*>(&tmp);
}
@ -113,8 +113,8 @@ namespace Impl {
double shfl(const double &val, const int& srcLane, const int& width) {
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl(lo,srcLane,width);
hi = __shfl(hi,srcLane,width);
lo = KOKKOS_IMPL_CUDA_SHFL(lo,srcLane,width);
hi = KOKKOS_IMPL_CUDA_SHFL(hi,srcLane,width);
return __hiloint2double(hi,lo);
}
@ -123,8 +123,8 @@ namespace Impl {
Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) == 8) ,int>::type& width) {
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
lo = __shfl(lo,srcLane,width);
hi = __shfl(hi,srcLane,width);
lo = KOKKOS_IMPL_CUDA_SHFL(lo,srcLane,width);
hi = KOKKOS_IMPL_CUDA_SHFL(hi,srcLane,width);
const double tmp = __hiloint2double(hi,lo);
return *(reinterpret_cast<const Scalar*>(&tmp));
}
@ -137,18 +137,18 @@ namespace Impl {
s_val = val;
for(int i = 0; i<s_val.n; i++)
r_val.fval[i] = __shfl(s_val.fval[i],srcLane,width);
r_val.fval[i] = KOKKOS_IMPL_CUDA_SHFL(s_val.fval[i],srcLane,width);
return r_val.value();
}
KOKKOS_INLINE_FUNCTION
int shfl_down(const int &val, const int& delta, const int& width) {
return __shfl_down(val,delta,width);
return KOKKOS_IMPL_CUDA_SHFL_DOWN(val,delta,width);
}
KOKKOS_INLINE_FUNCTION
float shfl_down(const float &val, const int& delta, const int& width) {
return __shfl_down(val,delta,width);
return KOKKOS_IMPL_CUDA_SHFL_DOWN(val,delta,width);
}
template<typename Scalar>
@ -156,7 +156,7 @@ namespace Impl {
Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) {
Scalar tmp1 = val;
float tmp = *reinterpret_cast<float*>(&tmp1);
tmp = __shfl_down(tmp,delta,width);
tmp = KOKKOS_IMPL_CUDA_SHFL_DOWN(tmp,delta,width);
return *reinterpret_cast<Scalar*>(&tmp);
}
@ -164,8 +164,8 @@ namespace Impl {
double shfl_down(const double &val, const int& delta, const int& width) {
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl_down(lo,delta,width);
hi = __shfl_down(hi,delta,width);
lo = KOKKOS_IMPL_CUDA_SHFL_DOWN(lo,delta,width);
hi = KOKKOS_IMPL_CUDA_SHFL_DOWN(hi,delta,width);
return __hiloint2double(hi,lo);
}
@ -174,8 +174,8 @@ namespace Impl {
Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) {
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
lo = __shfl_down(lo,delta,width);
hi = __shfl_down(hi,delta,width);
lo = KOKKOS_IMPL_CUDA_SHFL_DOWN(lo,delta,width);
hi = KOKKOS_IMPL_CUDA_SHFL_DOWN(hi,delta,width);
const double tmp = __hiloint2double(hi,lo);
return *(reinterpret_cast<const Scalar*>(&tmp));
}
@ -188,18 +188,18 @@ namespace Impl {
s_val = val;
for(int i = 0; i<s_val.n; i++)
r_val.fval[i] = __shfl_down(s_val.fval[i],delta,width);
r_val.fval[i] = KOKKOS_IMPL_CUDA_SHFL_DOWN(s_val.fval[i],delta,width);
return r_val.value();
}
KOKKOS_INLINE_FUNCTION
int shfl_up(const int &val, const int& delta, const int& width ) {
return __shfl_up(val,delta,width);
return KOKKOS_IMPL_CUDA_SHFL_UP(val,delta,width);
}
KOKKOS_INLINE_FUNCTION
float shfl_up(const float &val, const int& delta, const int& width ) {
return __shfl_up(val,delta,width);
return KOKKOS_IMPL_CUDA_SHFL_UP(val,delta,width);
}
template<typename Scalar>
@ -207,7 +207,7 @@ namespace Impl {
Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) {
Scalar tmp1 = val;
float tmp = *reinterpret_cast<float*>(&tmp1);
tmp = __shfl_up(tmp,delta,width);
tmp = KOKKOS_IMPL_CUDA_SHFL_UP(tmp,delta,width);
return *reinterpret_cast<Scalar*>(&tmp);
}
@ -215,8 +215,8 @@ namespace Impl {
double shfl_up(const double &val, const int& delta, const int& width ) {
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl_up(lo,delta,width);
hi = __shfl_up(hi,delta,width);
lo = KOKKOS_IMPL_CUDA_SHFL_UP(lo,delta,width);
hi = KOKKOS_IMPL_CUDA_SHFL_UP(hi,delta,width);
return __hiloint2double(hi,lo);
}
@ -225,8 +225,8 @@ namespace Impl {
Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) {
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
lo = __shfl_up(lo,delta,width);
hi = __shfl_up(hi,delta,width);
lo = KOKKOS_IMPL_CUDA_SHFL_UP(lo,delta,width);
hi = KOKKOS_IMPL_CUDA_SHFL_UP(hi,delta,width);
const double tmp = __hiloint2double(hi,lo);
return *(reinterpret_cast<const Scalar*>(&tmp));
}
@ -239,7 +239,7 @@ namespace Impl {
s_val = val;
for(int i = 0; i<s_val.n; i++)
r_val.fval[i] = __shfl_up(s_val.fval[i],delta,width);
r_val.fval[i] = KOKKOS_IMPL_CUDA_SHFL_UP(s_val.fval[i],delta,width);
return r_val.value();
}

View File

@ -0,0 +1,12 @@
#include<Kokkos_Macros.hpp>
#if ( CUDA_VERSION < 9000 )
#define KOKKOS_IMPL_CUDA_BALLOT(x) __ballot(x)
#define KOKKOS_IMPL_CUDA_SHFL(x,y,z) __shfl(x,y,z)
#define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) __shfl_up(x,y,z)
#define KOKKOS_IMPL_CUDA_SHFL_DOWN(x,y,z) __shfl_down(x,y,z)
#else
#define KOKKOS_IMPL_CUDA_BALLOT(x) __ballot_sync(0xffffffff,x)
#define KOKKOS_IMPL_CUDA_SHFL(x,y,z) __shfl_sync(0xffffffff,x,y,z)
#define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) __shfl_up_sync(0xffffffff,x,y,z)
#define KOKKOS_IMPL_CUDA_SHFL_DOWN(x,y,z) __shfl_down_sync(0xffffffff,x,y,z)
#endif

View File

@ -251,7 +251,7 @@
#endif
#endif
#if defined( __PGIC__ ) && !defined( __GNUC__ )
#if defined( __PGIC__ )
#define KOKKOS_COMPILER_PGI __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__
#if ( 1540 > KOKKOS_COMPILER_PGI )
@ -268,7 +268,9 @@
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#if ( 1800 > KOKKOS_COMPILER_INTEL )
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#endif
#if ( __INTEL_COMPILER > 1400 )
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
@ -511,5 +513,11 @@
#define KOKKOS_ENABLE_TASKDAG
#endif
#if defined ( KOKKOS_ENABLE_CUDA )
#if ( 9000 <= CUDA_VERSION )
#define KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND
#endif
#endif
#endif // #ifndef KOKKOS_MACROS_HPP

View File

@ -51,6 +51,27 @@
#include <impl/Kokkos_Error.hpp>
#include <impl/Kokkos_SharedAlloc.hpp>
namespace Kokkos {
namespace Impl {
/* Report violation of size constraints:
* min_block_alloc_size <= max_block_alloc_size
* max_block_alloc_size <= min_superblock_size
* min_superblock_size <= max_superblock_size
* min_superblock_size <= min_total_alloc_size
* min_superblock_size <= min_block_alloc_size *
* max_block_per_superblock
*/
void memory_pool_bounds_verification
( size_t min_block_alloc_size
, size_t max_block_alloc_size
, size_t min_superblock_size
, size_t max_superblock_size
, size_t max_block_per_superblock
, size_t min_total_alloc_size
);
}
}
namespace Kokkos {
template< typename DeviceType >
@ -332,39 +353,23 @@ public:
//--------------------------------------------------
{
/* Enforce size constraints:
* min_block_alloc_size <= max_block_alloc_size
* max_block_alloc_size <= min_superblock_size
* min_superblock_size <= max_superblock_size
* min_superblock_size <= min_total_alloc_size
* min_superblock_size <= min_block_alloc_size *
* max_block_per_superblock
*/
/* Enforce size constraints:
* min_block_alloc_size <= max_block_alloc_size
* max_block_alloc_size <= min_superblock_size
* min_superblock_size <= max_superblock_size
* min_superblock_size <= min_total_alloc_size
* min_superblock_size <= min_block_alloc_size *
* max_block_per_superblock
*/
const size_t max_superblock =
min_block_alloc_size * max_block_per_superblock ;
if ( ( size_t(max_superblock_size) < min_superblock_size ) ||
( min_total_alloc_size < min_superblock_size ) ||
( max_superblock < min_superblock_size ) ||
( min_superblock_size < max_block_alloc_size ) ||
( max_block_alloc_size < min_block_alloc_size ) ) {
#if 1
printf( " MemoryPool min_block_alloc_size(%ld) max_block_alloc_size(%ld) min_superblock_size(%ld) min_total_alloc_size(%ld) ; max_superblock_size(%ld) max_block_per_superblock(%ld)\n"
, min_block_alloc_size
Kokkos::Impl::memory_pool_bounds_verification
( min_block_alloc_size
, max_block_alloc_size
, min_superblock_size
, max_superblock_size
, max_block_per_superblock
, min_total_alloc_size
, size_t(max_superblock_size)
, size_t(max_block_per_superblock)
);
#endif
Kokkos::abort("Kokkos MemoryPool size constraint violation");
}
}
//--------------------------------------------------
// Block and superblock size is power of two:

View File

@ -204,6 +204,7 @@ struct reduction_identity<double> {
KOKKOS_FORCEINLINE_FUNCTION constexpr static double min() {return DBL_MAX;}
};
#if !defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA )
template<>
struct reduction_identity<long double> {
KOKKOS_FORCEINLINE_FUNCTION constexpr static long double sum() {return static_cast<long double>(0.0);}
@ -211,6 +212,7 @@ struct reduction_identity<long double> {
KOKKOS_FORCEINLINE_FUNCTION constexpr static long double max() {return -LDBL_MAX;}
KOKKOS_FORCEINLINE_FUNCTION constexpr static long double min() {return LDBL_MAX;}
};
#endif
}

View File

@ -78,7 +78,7 @@ struct pair
/// This calls the default constructors of T1 and T2. It won't
/// compile if those default constructors are not defined and
/// public.
KOKKOS_FORCEINLINE_FUNCTION constexpr
KOKKOS_FUNCTION_DEFAULTED constexpr
pair() = default ;
/// \brief Constructor that takes both elements of the pair.
@ -458,7 +458,7 @@ struct pair<T1,void>
first_type first;
enum { second = 0 };
KOKKOS_FORCEINLINE_FUNCTION constexpr
KOKKOS_FUNCTION_DEFAULTED constexpr
pair() = default ;
KOKKOS_FORCEINLINE_FUNCTION constexpr

View File

@ -241,7 +241,7 @@ void parallel_for( const std::string & str
std::cout << "KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
#endif
parallel_for(policy,functor,str);
::Kokkos::parallel_for(policy,functor,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
@ -487,7 +487,7 @@ void parallel_scan( const std::string& str
std::cout << "KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
#endif
parallel_scan(policy,functor,str);
::Kokkos::parallel_scan(policy,functor,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();

View File

@ -0,0 +1,111 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOSP_PROFILE_SECTION_HPP
#define KOKKOSP_PROFILE_SECTION_HPP
#include <Kokkos_Macros.hpp>
#include <impl/Kokkos_Profiling_Interface.hpp>
#include <string>
namespace Kokkos {
namespace Profiling {
class ProfilingSection {
public:
ProfilingSection(const std::string& sectionName) :
secName(sectionName) {
#if defined( KOKKOS_ENABLE_PROFILING )
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::createProfileSection(secName, &secID);
}
#else
secID = 0;
#endif
}
void start() {
#if defined( KOKKOS_ENABLE_PROFILING )
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::startSection(secID);
}
#endif
}
void stop() {
#if defined( KOKKOS_ENABLE_PROFILING )
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::stopSection(secID);
}
#endif
}
~ProfilingSection() {
#if defined( KOKKOS_ENABLE_PROFILING )
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::destroyProfileSection(secID);
}
#endif
}
std::string getName() {
return secName;
}
uint32_t getSectionID() {
return secID;
}
protected:
const std::string secName;
uint32_t secID;
};
}
}
#endif

View File

@ -145,7 +145,7 @@ public:
unsigned use_cores_per_numa = 0 ,
bool allow_asynchronous_threadpool = false);
static int is_initialized();
static bool is_initialized();
/** \brief Return the maximum amount of concurrency. */
static int concurrency() {return 1;};
@ -424,11 +424,13 @@ private:
typedef typename Policy::work_tag WorkTag ;
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
typedef typename Analysis::pointer_type pointer_type ;
typedef typename Analysis::reference_type reference_type ;
@ -488,7 +490,7 @@ public:
this-> template exec< WorkTag >( update );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::
final( ReducerConditional::select(m_functor , m_reducer) , ptr );
}
@ -675,12 +677,13 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
typedef typename ReducerTypeFwd::value_type ValueType;
typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
typedef typename Analysis::pointer_type pointer_type ;
typedef typename Analysis::reference_type reference_type ;
@ -735,7 +738,7 @@ public:
this-> exec( update );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::
final( ReducerConditional::select(m_functor , m_reducer) , ptr );
}
@ -878,8 +881,9 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
typedef typename Analysis::pointer_type pointer_type ;
typedef typename Analysis::reference_type reference_type ;
@ -940,7 +944,7 @@ public:
this-> template exec< WorkTag >( data , update );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::
final( ReducerConditional::select(m_functor , m_reducer) , ptr );
}

View File

@ -5,51 +5,44 @@ endif
PREFIX ?= /usr/local/lib/kokkos
default: messages build-lib
echo "End Build"
default: build-lib
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
CXX ?= $(KOKKOS_PATH)/bin/nvcc_wrapper
else
CXX = g++
CXX ?= g++
endif
CXXFLAGS = -O3
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?=
include $(KOKKOS_PATH)/Makefile.kokkos
PWD = $(shell pwd)
KOKKOS_HEADERS_INCLUDE = $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
KOKKOS_HEADERS_INCLUDE_IMPL = $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
KOKKOS_HEADERS_INCLUDE_IMPL += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
include $(KOKKOS_PATH)/core/src/Makefile.generate_header_lists
include $(KOKKOS_PATH)/core/src/Makefile.generate_build_files
CONDITIONAL_COPIES =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
CONDITIONAL_COPIES += copy-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
CONDITIONAL_COPIES += copy-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
KOKKOS_HEADERS_QTHREADS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
CONDITIONAL_COPIES += copy-qthreads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
CONDITIONAL_COPIES += copy-openmp
endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
CONDITIONAL_COPIES += copy-rocm
endif
ifeq ($(KOKKOS_OS),CYGWIN)
COPY_FLAG = -u
endif
@ -66,104 +59,7 @@ else
KOKKOS_DEBUG_CMAKE = ON
endif
messages:
echo "Start Build"
build-makefile-kokkos:
rm -f Makefile.kokkos
echo "#Global Settings used to generate this library" >> Makefile.kokkos
echo "KOKKOS_PATH = $(PREFIX)" >> Makefile.kokkos
echo "KOKKOS_DEVICES = $(KOKKOS_DEVICES)" >> Makefile.kokkos
echo "KOKKOS_ARCH = $(KOKKOS_ARCH)" >> Makefile.kokkos
echo "KOKKOS_DEBUG = $(KOKKOS_DEBUG)" >> Makefile.kokkos
echo "KOKKOS_USE_TPLS = $(KOKKOS_USE_TPLS)" >> Makefile.kokkos
echo "KOKKOS_CXX_STANDARD = $(KOKKOS_CXX_STANDARD)" >> Makefile.kokkos
echo "KOKKOS_OPTIONS = $(KOKKOS_OPTIONS)" >> Makefile.kokkos
echo "KOKKOS_CUDA_OPTIONS = $(KOKKOS_CUDA_OPTIONS)" >> Makefile.kokkos
echo "CXX ?= $(CXX)" >> Makefile.kokkos
echo "NVCC_WRAPPER ?= $(PREFIX)/bin/nvcc_wrapper" >> Makefile.kokkos
echo "" >> Makefile.kokkos
echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> Makefile.kokkos
echo "KOKKOS_HEADERS = $(KOKKOS_HEADERS)" >> Makefile.kokkos
echo "KOKKOS_SRC = $(KOKKOS_SRC)" >> Makefile.kokkos
echo "" >> Makefile.kokkos
echo "#Variables used in application Makefiles" >> Makefile.kokkos
echo "KOKKOS_OS = $(KOKKOS_OS)" >> Makefile.kokkos
echo "KOKKOS_CPP_DEPENDS = $(KOKKOS_CPP_DEPENDS)" >> Makefile.kokkos
echo "KOKKOS_CXXFLAGS = $(KOKKOS_CXXFLAGS)" >> Makefile.kokkos
echo "KOKKOS_CPPFLAGS = $(KOKKOS_CPPFLAGS)" >> Makefile.kokkos
echo "KOKKOS_LINK_DEPENDS = $(KOKKOS_LINK_DEPENDS)" >> Makefile.kokkos
echo "KOKKOS_LIBS = $(KOKKOS_LIBS)" >> Makefile.kokkos
echo "KOKKOS_LDFLAGS = $(KOKKOS_LDFLAGS)" >> Makefile.kokkos
echo "" >> Makefile.kokkos
echo "#Internal settings which need to propagated for Kokkos examples" >> Makefile.kokkos
echo "KOKKOS_INTERNAL_USE_CUDA = ${KOKKOS_INTERNAL_USE_CUDA}" >> Makefile.kokkos
echo "KOKKOS_INTERNAL_USE_QTHREADS = ${KOKKOS_INTERNAL_USE_QTHREADS}" >> Makefile.kokkos
echo "KOKKOS_INTERNAL_USE_OPENMP = ${KOKKOS_INTERNAL_USE_OPENMP}" >> Makefile.kokkos
echo "KOKKOS_INTERNAL_USE_PTHREADS = ${KOKKOS_INTERNAL_USE_PTHREADS}" >> Makefile.kokkos
echo "" >> Makefile.kokkos
echo "#Fake kokkos-clean target" >> Makefile.kokkos
echo "kokkos-clean:" >> Makefile.kokkos
echo "" >> Makefile.kokkos
sed \
-e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \
-e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \
-e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \
-e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \
-e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \
-e 's|= KokkosCore_config.h|= $(PREFIX)/include/KokkosCore_config.h|g' Makefile.kokkos \
> Makefile.kokkos.tmp
mv -f Makefile.kokkos.tmp Makefile.kokkos
build-cmake-kokkos:
rm -f kokkos.cmake
echo "#Global Settings used to generate this library" >> kokkos.cmake
echo "set(KOKKOS_PATH $(PREFIX) CACHE PATH \"Kokkos installation path\")" >> kokkos.cmake
echo "set(KOKKOS_DEVICES $(KOKKOS_DEVICES) CACHE STRING \"Kokkos devices list\")" >> kokkos.cmake
echo "set(KOKKOS_ARCH $(KOKKOS_ARCH) CACHE STRING \"Kokkos architecture flags\")" >> kokkos.cmake
echo "set(KOKKOS_DEBUG $(KOKKOS_DEBUG_CMAKE) CACHE BOOL \"Kokkos debug enabled ?)\")" >> kokkos.cmake
echo "set(KOKKOS_USE_TPLS $(KOKKOS_USE_TPLS) CACHE STRING \"Kokkos templates list\")" >> kokkos.cmake
echo "set(KOKKOS_CXX_STANDARD $(KOKKOS_CXX_STANDARD) CACHE STRING \"Kokkos C++ standard\")" >> kokkos.cmake
echo "set(KOKKOS_OPTIONS $(KOKKOS_OPTIONS) CACHE STRING \"Kokkos options\")" >> kokkos.cmake
echo "set(KOKKOS_CUDA_OPTIONS $(KOKKOS_CUDA_OPTIONS) CACHE STRING \"Kokkos Cuda options\")" >> kokkos.cmake
echo "if(NOT $ENV{CXX})" >> kokkos.cmake
echo ' message(WARNING "You are currently using compiler $${CMAKE_CXX_COMPILER} while Kokkos was built with $(CXX) ; make sure this is the behavior you intended to be.")' >> kokkos.cmake
echo "endif()" >> kokkos.cmake
echo "if(NOT DEFINED ENV{NVCC_WRAPPER})" >> kokkos.cmake
echo " set(NVCC_WRAPPER \"$(NVCC_WRAPPER)\" CACHE FILEPATH \"Path to command nvcc_wrapper\")" >> kokkos.cmake
echo "else()" >> kokkos.cmake
echo ' set(NVCC_WRAPPER $$ENV{NVCC_WRAPPER} CACHE FILEPATH "Path to command nvcc_wrapper")' >> kokkos.cmake
echo "endif()" >> kokkos.cmake
echo "" >> kokkos.cmake
echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> kokkos.cmake
echo "set(KOKKOS_HEADERS \"$(KOKKOS_HEADERS)\" CACHE STRING \"Kokkos headers list\")" >> kokkos.cmake
echo "set(KOKKOS_SRC \"$(KOKKOS_SRC)\" CACHE STRING \"Kokkos source list\")" >> kokkos.cmake
echo "" >> kokkos.cmake
echo "#Variables used in application Makefiles" >> kokkos.cmake
echo "set(KOKKOS_CPP_DEPENDS \"$(KOKKOS_CPP_DEPENDS)\" CACHE STRING \"\")" >> kokkos.cmake
echo "set(KOKKOS_CXXFLAGS \"$(KOKKOS_CXXFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake
echo "set(KOKKOS_CPPFLAGS \"$(KOKKOS_CPPFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake
echo "set(KOKKOS_LINK_DEPENDS \"$(KOKKOS_LINK_DEPENDS)\" CACHE STRING \"\")" >> kokkos.cmake
echo "set(KOKKOS_LIBS \"$(KOKKOS_LIBS)\" CACHE STRING \"\")" >> kokkos.cmake
echo "set(KOKKOS_LDFLAGS \"$(KOKKOS_LDFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake
echo "" >> kokkos.cmake
echo "#Internal settings which need to propagated for Kokkos examples" >> kokkos.cmake
echo "set(KOKKOS_INTERNAL_USE_CUDA \"${KOKKOS_INTERNAL_USE_CUDA}\" CACHE STRING \"\")" >> kokkos.cmake
echo "set(KOKKOS_INTERNAL_USE_OPENMP \"${KOKKOS_INTERNAL_USE_OPENMP}\" CACHE STRING \"\")" >> kokkos.cmake
echo "set(KOKKOS_INTERNAL_USE_PTHREADS \"${KOKKOS_INTERNAL_USE_PTHREADS}\" CACHE STRING \"\")" >> kokkos.cmake
echo "mark_as_advanced(KOKKOS_HEADERS KOKKOS_SRC KOKKOS_INTERNAL_USE_CUDA KOKKOS_INTERNAL_USE_OPENMP KOKKOS_INTERNAL_USE_PTHREADS)" >> kokkos.cmake
echo "" >> kokkos.cmake
sed \
-e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \
-e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \
-e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \
-e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \
-e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \
-e 's|= KokkosCore_config.h|= $(PREFIX)/include/KokkosCore_config.h|g' kokkos.cmake \
> kokkos.cmake.tmp
mv -f kokkos.cmake.tmp kokkos.cmake
build-lib: build-makefile-kokkos build-cmake-kokkos $(KOKKOS_LINK_DEPENDS)
build-lib: $(KOKKOS_LINK_DEPENDS)
mkdir:
mkdir -p $(PREFIX)
@ -188,14 +84,18 @@ copy-openmp: mkdir
mkdir -p $(PREFIX)/include/OpenMP
cp $(COPY_FLAG) $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP
install: mkdir $(CONDITIONAL_COPIES) build-lib
copy-rocm: mkdir
mkdir -p $(PREFIX)/include/ROCm
cp $(COPY_FLAG) $(KOKKOS_HEADERS_ROCM) $(PREFIX)/include/ROCm
install: mkdir $(CONDITIONAL_COPIES) build-lib generate_build_settings
cp $(COPY_FLAG) $(NVCC_WRAPPER) $(PREFIX)/bin
cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include
cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl
cp $(COPY_FLAG) Makefile.kokkos $(PREFIX)
cp $(COPY_FLAG) kokkos.cmake $(PREFIX)
cp $(COPY_FLAG) $(KOKKOS_MAKEFILE) $(PREFIX)
cp $(COPY_FLAG) $(KOKKOS_CMAKEFILE) $(PREFIX)
cp $(COPY_FLAG) libkokkos.a $(PREFIX)/lib
cp $(COPY_FLAG) KokkosCore_config.h $(PREFIX)/include
cp $(COPY_FLAG) $(KOKKOS_CONFIG_HEADER) $(PREFIX)/include
clean: kokkos-clean
rm -f Makefile.kokkos
rm -f $(KOKKOS_MAKEFILE) $(KOKKOS_CMAKEFILE)

View File

@ -0,0 +1,100 @@
# This file is responsible for generating files which will be used
# by build system (make and cmake) in scenarios where the kokkos library
# gets installed before building the app
# These files are generated by this makefile
KOKKOS_MAKEFILE=Makefile.kokkos
KOKKOS_CMAKEFILE=kokkos_generated_settings.cmake
ifeq ($(KOKKOS_DEBUG),"no")
KOKKOS_DEBUG_CMAKE = OFF
else
KOKKOS_DEBUG_CMAKE = ON
endif
# Functions for generating makefile and cmake file
# In calling these routines, do not put space after ,
# e.g., $(call kokkos_append_var,KOKKOS_PATH,$(PREFIX))
kokkos_append_makefile = echo $1 >> $(KOKKOS_MAKEFILE)
kokkos_append_cmakefile = echo $1 >> $(KOKKOS_CMAKEFILE)
kokkos_setvar_cmakefile = echo set\($1 $2\) >> $(KOKKOS_CMAKEFILE)
kokkos_setlist_cmakefile = echo set\($1 \"$2\"\) >> $(KOKKOS_CMAKEFILE)
kokkos_appendvar_makefile = echo $1 = $($(1)) >> $(KOKKOS_MAKEFILE)
kokkos_appendvar2_makefile = echo $1 ?= $($(1)) >> $(KOKKOS_MAKEFILE)
kokkos_appendvar_cmakefile = echo set\($1 $($(1)) CACHE $2 FORCE\) >> $(KOKKOS_CMAKEFILE)
kokkos_appendval_makefile = echo $1 = $2 >> $(KOKKOS_MAKEFILE)
kokkos_appendval_cmakefile = echo set\($1 $2 CACHE $3 FORCE\) >> $(KOKKOS_CMAKEFILE)
kokkos_append_string = $(call kokkos_append_makefile,$1); $(call kokkos_append_cmakefile,$1)
kokkos_append_var = $(call kokkos_appendvar_makefile,$1); $(call kokkos_appendvar_cmakefile,$1,$2)
kokkos_append_var2 = $(call kokkos_appendvar2_makefile,$1); $(call kokkos_appendvar_cmakefile,$1,$2)
kokkos_append_varval = $(call kokkos_appendval_makefile,$1,$2); $(call kokkos_appendval_cmakefile,$1,$2,$3)
generate_build_settings: $(KOKKOS_CONFIG_HEADER)
@rm -f $(KOKKOS_MAKEFILE)
@rm -f $(KOKKOS_CMAKEFILE)
@$(call kokkos_append_string, "#Global Settings used to generate this library")
@$(call kokkos_append_varval,KOKKOS_PATH,$(KOKKOS_INSTALL_PATH),'FILEPATH "Kokkos installation path"')
@$(call kokkos_append_var,KOKKOS_DEVICES,'STRING "Kokkos devices list"')
@$(call kokkos_append_var,KOKKOS_ARCH,'STRING "Kokkos architecture flags"')
@$(call kokkos_appendvar_makefile,KOKKOS_DEBUG)
@$(call kokkos_appendvar_cmakefile,KOKKOS_DEBUG_CMAKE,'BOOL "Kokkos debug enabled ?"')
@$(call kokkos_append_var,KOKKOS_USE_TPLS,'STRING "Kokkos templates list"')
@$(call kokkos_append_var,KOKKOS_CXX_STANDARD,'STRING "Kokkos C++ standard"')
@$(call kokkos_append_var,KOKKOS_OPTIONS,'STRING "Kokkos options"')
@$(call kokkos_append_var,KOKKOS_CUDA_OPTIONS,'STRING "Kokkos Cuda options"')
@$(call kokkos_appendvar2,CXX,'KOKKOS C++ Compiler')
@$(call kokkos_append_cmakefile,"if(NOT DEFINED ENV{NVCC_WRAPPER})")
@$(call kokkos_append_var2,NVCC_WRAPPER,'FILEPATH "Path to command nvcc_wrapper"')
@$(call kokkos_append_cmakefile,"else()")
@$(call kokkos_append_cmakefile,' set(NVCC_WRAPPER $$ENV{NVCC_WRAPPER} CACHE FILEPATH "Path to command nvcc_wrapper")')
@$(call kokkos_append_cmakefile,"endif()")
@$(call kokkos_append_string,"")
@$(call kokkos_append_string,"#Source and Header files of Kokkos relative to KOKKOS_PATH")
@$(call kokkos_append_var,KOKKOS_HEADERS,'STRING "Kokkos headers list"')
@$(call kokkos_append_var,KOKKOS_HEADERS_IMPL,'STRING "Kokkos headers impl list"')
@$(call kokkos_append_var,KOKKOS_HEADERS_CUDA,'STRING "Kokkos headers Cuda list"')
@$(call kokkos_append_var,KOKKOS_HEADERS_OPENMP,'STRING "Kokkos headers OpenMP list"')
@$(call kokkos_append_var,KOKKOS_HEADERS_ROCM,'STRING "Kokkos headers ROCm list"')
@$(call kokkos_append_var,KOKKOS_HEADERS_THREADS,'STRING "Kokkos headers Threads list"')
@$(call kokkos_append_var,KOKKOS_HEADERS_QTHREADS,'STRING "Kokkos headers QThreads list"')
@$(call kokkos_append_var,KOKKOS_SRC,'STRING "Kokkos source list"')
@$(call kokkos_append_string,"")
@$(call kokkos_append_string,"#Variables used in application Makefiles")
@$(call kokkos_append_var,KOKKOS_OS,'STRING ""') # This was not in original cmake gen
@$(call kokkos_append_var,KOKKOS_CPP_DEPENDS,'STRING ""')
@$(call kokkos_append_var,KOKKOS_LINK_DEPENDS,'STRING ""')
@$(call kokkos_append_var,KOKKOS_CXXFLAGS,'STRING ""')
@$(call kokkos_append_var,KOKKOS_CPPFLAGS,'STRING ""')
@$(call kokkos_append_var,KOKKOS_LDFLAGS,'STRING ""')
@$(call kokkos_append_var,KOKKOS_LIBS,'STRING ""')
@$(call kokkos_append_var,KOKKOS_EXTRA_LIBS,'STRING ""')
@$(call kokkos_append_string,"")
@$(call kokkos_append_string,"#Internal settings which need to propagated for Kokkos examples")
@$(call kokkos_append_var,KOKKOS_INTERNAL_USE_CUDA,'STRING ""')
@$(call kokkos_append_var,KOKKOS_INTERNAL_USE_OPENMP,'STRING ""')
@$(call kokkos_append_var,KOKKOS_INTERNAL_USE_PTHREADS,'STRING ""')
@$(call kokkos_append_var,KOKKOS_INTERNAL_USE_ROCM,'STRING ""')
@$(call kokkos_append_var,KOKKOS_INTERNAL_USE_QTHREADS,'STRING ""') # Not in original cmake gen
@$(call kokkos_append_cmakefile "mark_as_advanced(KOKKOS_HEADERS KOKKOS_SRC KOKKOS_INTERNAL_USE_CUDA KOKKOS_INTERNAL_USE_OPENMP KOKKOS_INTERNAL_USE_PTHREADS)")
@$(call kokkos_append_makefile,"")
@$(call kokkos_append_makefile,"#Fake kokkos-clean target")
@$(call kokkos_append_makefile,"kokkos-clean:")
@$(call kokkos_append_makefile,"")
@sed \
-e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \
-e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \
-e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \
-e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \
-e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \
-e 's|= $(KOKKOS_CONFIG_HEADER)|= $(PREFIX)/include/$(KOKKOS_CONFIG_HEADER)|g' $(KOKKOS_MAKEFILE) \
> $(KOKKOS_MAKEFILE).tmp
@mv -f $(KOKKOS_MAKEFILE).tmp $(KOKKOS_MAKEFILE)
@$(call kokkos_setvar_cmakefile,KOKKOS_CXX_FLAGS,$(KOKKOS_CXXFLAGS))
@$(call kokkos_setvar_cmakefile,KOKKOS_CPP_FLAGS,$(KOKKOS_CPPFLAGS))
@$(call kokkos_setvar_cmakefile,KOKKOS_LD_FLAGS,$(KOKKOS_LDFLAGS))
@$(call kokkos_setlist_cmakefile,KOKKOS_LIBS_LIST,$(KOKKOS_LIBS))
@$(call kokkos_setlist_cmakefile,KOKKOS_EXTRA_LIBS_LIST,$(KOKKOS_EXTRA_LIBS))

View File

@ -0,0 +1,28 @@
# Build a List of Header Files
KOKKOS_HEADERS_INCLUDE = $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
KOKKOS_HEADERS_INCLUDE_IMPL = $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
KOKKOS_HEADERS_INCLUDE_IMPL += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
KOKKOS_HEADERS_QTHREADS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
KOKKOS_HEADERS_ROCM += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp)
endif

View File

@ -292,11 +292,12 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
// Static Assert WorkTag void if ReducerType not InvalidType
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ;
typedef typename Analysis::pointer_type pointer_type ;
typedef typename Analysis::reference_type reference_type ;
@ -393,7 +394,7 @@ public:
, m_instance->get_thread_data(i)->pool_reduce_local() );
}
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , ptr );
if ( m_result_ptr ) {
const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) );
@ -463,11 +464,12 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
typedef typename ReducerTypeFwd::value_type ValueType;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ;
typedef typename Analysis::pointer_type pointer_type ;
typedef typename Analysis::reference_type reference_type ;
@ -558,7 +560,7 @@ public:
, m_instance->get_thread_data(i)->pool_reduce_local() );
}
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , ptr );
if ( m_result_ptr ) {
const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) );
@ -920,9 +922,10 @@ private:
, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTag > ValueJoin ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTagFwd > ValueJoin ;
typedef typename Analysis::pointer_type pointer_type ;
typedef typename Analysis::reference_type reference_type ;
@ -1067,7 +1070,7 @@ public:
, m_instance->get_thread_data(i)->pool_reduce_local() );
}
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , ptr );
if ( m_result_ptr ) {
const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) );

View File

@ -248,12 +248,13 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
// Static Assert WorkTag void if ReducerType not InvalidType
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTagFwd > ValueJoin ;
enum {HasJoin = ReduceFunctorHasJoin<FunctorType>::value };
enum {UseReducer = is_reducer_type<ReducerType>::value };
@ -620,10 +621,11 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTag > ValueJoin ;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTagFwd > ValueJoin ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;

View File

@ -150,11 +150,12 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType, ReducerType>::value, FunctorType, ReducerType > ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType, ReducerType>::value, WorkTag, void >::type WorkTagFwd;
// Static Assert WorkTag void if ReducerType not InvalidType
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
@ -213,7 +214,7 @@ public:
const pointer_type data = (pointer_type) QthreadsExec::exec_all_reduce_result();
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , data );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , data );
if ( m_result_ptr ) {
const unsigned n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) );
@ -331,9 +332,10 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType, ReducerType>::value, WorkTag, void >::type WorkTagFwd;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
@ -394,7 +396,7 @@ public:
const pointer_type data = (pointer_type) QthreadsExec::exec_all_reduce_result();
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer), data );
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer), data );
if ( m_result_ptr ) {
const unsigned n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) );

View File

@ -102,11 +102,12 @@ void reduce_enqueue(
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, F, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType, ReducerType>::value, Tag, void >::type TagFwd;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , Tag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , Tag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , Tag > ValueJoin ;
typedef Kokkos::Impl::FunctorFinal< ReducerTypeFwd , Tag > ValueFinal ;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , TagFwd > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , TagFwd > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , TagFwd > ValueJoin ;
typedef Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagFwd > ValueFinal ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;

View File

@ -50,7 +50,6 @@
#include <cstdio>
#include <utility>
#include <cstdalign>
#include <impl/Kokkos_Spinwait.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>

View File

@ -396,9 +396,10 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
@ -458,7 +459,7 @@ private:
( self.m_functor , range.begin() , range.end()
, ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ) );
exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) );
exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) );
}
template<class Schedule>
@ -484,7 +485,7 @@ private:
work_index = exec.get_work_index();
}
exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) );
exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) );
}
public:
@ -564,11 +565,12 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
typedef typename ReducerTypeFwd::value_type ValueType;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
@ -618,7 +620,7 @@ private:
( self.m_mdr_policy, self.m_functor , range.begin() , range.end()
, ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ) );
exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) );
exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) );
}
template<class Schedule>
@ -644,7 +646,7 @@ private:
work_index = exec.get_work_index();
}
exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) );
exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) );
}
public:
@ -725,9 +727,10 @@ private:
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
typedef typename ReducerConditional::type ReducerTypeFwd;
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
@ -767,7 +770,7 @@ private:
( self.m_functor , Member( & exec , self.m_policy , self.m_shared )
, ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ) );
exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) );
exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) );
}
public:

View File

@ -49,6 +49,10 @@
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP )
#define KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP
#if defined(KOKKOS_ENABLE_CUDA)
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
#endif
namespace Kokkos {
//----------------------------------------------------------------------------
@ -103,7 +107,7 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
int done = 0;
unsigned int active = __ballot(1);
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
unsigned int done_active = 0;
while (active!=done_active) {
if(!done) {
@ -115,7 +119,7 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
done = 1;
}
}
done_active = __ballot(done);
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
}
return return_val;
}

View File

@ -49,6 +49,10 @@
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_EXCHANGE_HPP )
#define KOKKOS_ATOMIC_EXCHANGE_HPP
#if defined(KOKKOS_ENABLE_CUDA)
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
#endif
namespace Kokkos {
//----------------------------------------------------------------------------
@ -126,7 +130,7 @@ T atomic_exchange( volatile T * const dest ,
#endif
int done = 0;
unsigned int active = __ballot(1);
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
unsigned int done_active = 0;
while (active!=done_active) {
if(!done) {
@ -137,7 +141,7 @@ T atomic_exchange( volatile T * const dest ,
done = 1;
}
}
done_active = __ballot(done);
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
}
return return_val;
}

View File

@ -49,6 +49,10 @@
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_ADD_HPP )
#define KOKKOS_ATOMIC_FETCH_ADD_HPP
#if defined(KOKKOS_ENABLE_CUDA)
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
#endif
namespace Kokkos {
//----------------------------------------------------------------------------
@ -139,7 +143,7 @@ T atomic_fetch_add( volatile T * const dest ,
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
int done = 0;
unsigned int active = __ballot(1);
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
unsigned int done_active = 0;
while (active!=done_active) {
if(!done) {
@ -151,7 +155,7 @@ T atomic_fetch_add( volatile T * const dest ,
done = 1;
}
}
done_active = __ballot(done);
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
}
return return_val;
}

View File

@ -49,6 +49,10 @@
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_SUB_HPP )
#define KOKKOS_ATOMIC_FETCH_SUB_HPP
#if defined(KOKKOS_ENABLE_CUDA)
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
#endif
namespace Kokkos {
//----------------------------------------------------------------------------
@ -117,7 +121,7 @@ T atomic_fetch_sub( volatile T * const dest ,
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
int done = 0;
unsigned int active = __ballot(1);
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
unsigned int done_active = 0;
while (active!=done_active) {
if(!done) {
@ -128,7 +132,7 @@ T atomic_fetch_sub( volatile T * const dest ,
done = 1;
}
}
done_active = __ballot(done);
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
}
return return_val;
}

View File

@ -46,6 +46,10 @@
#define KOKKOS_ATOMIC_GENERIC_HPP
#include <Kokkos_Macros.hpp>
#if defined(KOKKOS_ENABLE_CUDA)
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
#endif
// Combination operands to be used in an Compare and Exchange based atomic operation
namespace Kokkos {
namespace Impl {
@ -242,7 +246,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
// This is a way to (hopefully) avoid dead lock in a warp
T return_val;
int done = 0;
unsigned int active = __ballot(1);
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
unsigned int done_active = 0;
while (active!=done_active) {
if(!done) {
@ -253,7 +257,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
done=1;
}
}
done_active = __ballot(done);
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
}
return return_val;
#endif
@ -281,7 +285,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
T return_val;
// This is a way to (hopefully) avoid dead lock in a warp
int done = 0;
unsigned int active = __ballot(1);
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
unsigned int done_active = 0;
while (active!=done_active) {
if(!done) {
@ -292,7 +296,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
done=1;
}
}
done_active = __ballot(done);
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
}
return return_val;
#endif

View File

@ -48,6 +48,10 @@
#include <cstdint>
#include <climits>
#if defined( __HCC_ACCELERATOR__ )
#include <hc.hpp>
#endif
namespace Kokkos {
namespace Impl {

View File

@ -132,10 +132,8 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
// struct, you may remove this line of code.
(void) args;
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
Kokkos::Serial::initialize();
}
// Always initialize Serial if it is configure time enabled
Kokkos::Serial::initialize();
#endif
#if defined( KOKKOS_ENABLE_OPENMPTARGET )
@ -234,12 +232,8 @@ void finalize_internal( const bool all_spaces = false )
#endif
#if defined( KOKKOS_ENABLE_SERIAL )
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
all_spaces ) {
if(Kokkos::Serial::is_initialized())
Kokkos::Serial::finalize();
}
if(Kokkos::Serial::is_initialized())
Kokkos::Serial::finalize();
#endif
g_is_initialized = false;
@ -383,6 +377,7 @@ void initialize(int& narg, char* arg[])
}
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found)
ndevices = atoi(num1_only);
delete [] num1_only;
if( num2 != NULL ) {
if(( !Impl::is_unsigned_int(num2+1) ) || (strlen(num2)==1) )
@ -439,7 +434,7 @@ void initialize(int& narg, char* arg[])
std::cout << "The following arguments exist also without prefix 'kokkos' (e.g. --help)." << std::endl;
std::cout << "The prefixed arguments will be removed from the list by Kokkos::initialize()," << std::endl;
std::cout << "the non-prefixed ones are not removed. Prefixed versions take precedence over " << std::endl;
std::cout << "non prefixed ones, and the last occurence of an argument overwrites prior" << std::endl;
std::cout << "non prefixed ones, and the last occurrence of an argument overwrites prior" << std::endl;
std::cout << "settings." << std::endl;
std::cout << std::endl;
std::cout << "--kokkos-help : print this message" << std::endl;

View File

@ -0,0 +1,204 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#include <Kokkos_Atomic.hpp>
#include <impl/Kokkos_HostBarrier.hpp>
#include <impl/Kokkos_Spinwait.hpp>
namespace Kokkos { namespace Impl {
namespace {
enum : int { HEADER_SIZE = HostBarrier::HEADER / sizeof(uint64_t) };
inline constexpr int length64( const int nthreads ) noexcept
{
return (nthreads-1 + sizeof(uint64_t)-1) / sizeof(uint64_t);
}
} // namespace
void rendezvous_initialize( volatile void * buffer
, const int size
, const int rank
) noexcept
{
Kokkos::store_fence();
// ensure that the buffer has been zero'd out
constexpr uint8_t zero8 = static_cast<uint8_t>(0);
constexpr uint64_t zero64 = static_cast<uint64_t>(0);
volatile uint64_t * header = reinterpret_cast<volatile uint64_t *>(buffer);
if (rank > 0) {
volatile uint8_t * bytes = reinterpret_cast<volatile uint8_t *>(buffer) + RENDEZVOUS_HEADER;
bytes[rank-1] = zero8;
// last thread is responsible for zeroing out the final bytes of the last uint64_t
if (rank == size-1) {
const int tmp = (size-1) % sizeof(uint64_t);
const int rem = tmp ? sizeof(uint64_t) - tmp : 0;
for (int i=0; i<rem; ++i) {
bytes[rank+i] = zero8;
}
}
spinwait_until_equal( *header, zero64 );
}
else {
const int n = length64(size);
volatile uint64_t * buff = reinterpret_cast<volatile uint64_t *>(buffer) + HEADER_SIZE;
// wait for other threads to finish initializing
for (int i=0; i<n; ++i) {
spinwait_until_equal( buff[i], zero64 );
}
// release the waiting threads
*header = zero64;
Kokkos::store_fence();
}
Kokkos::load_fence();
}
bool rendezvous( volatile void * buffer
, uint64_t & step
, const int size
, const int rank
, bool active_wait
) noexcept
{
// Force all outstanding stores from this thread to retire before continuing
Kokkos::store_fence();
// guarantees that will never spinwait on a spin_value of 0
step = static_cast<uint8_t>(step + 1u)
? step + 1u
: step + 2u
;
// if size == 1, it is incorrect for rank 0 to check the tail value of the buffer
// this optimization prevents a potential read of uninitialized memory
if ( size == 1 ) { return true; }
const uint8_t byte_value = static_cast<uint8_t>(step);
// byte that is set in the spin_value rotates every time
// this prevents threads from overtaking the master thread
const uint64_t spin_value = static_cast<uint64_t>(byte_value) << (byte_value&7);
if ( rank > 0 ) {
volatile uint64_t * header = reinterpret_cast<volatile uint64_t *>(buffer);
volatile uint8_t * bytes = reinterpret_cast<volatile uint8_t *>(buffer) + RENDEZVOUS_HEADER;
bytes[ rank-1 ] = byte_value;
if ( active_wait ) {
spinwait_until_equal( *header, spin_value );
}
else {
yield_until_equal( *header, spin_value );
}
}
else { // rank 0
volatile uint64_t * buff = reinterpret_cast<volatile uint64_t *>(buffer) + HEADER_SIZE;
const int n = length64(size);
uint64_t comp = byte_value;
comp = comp | (comp << 8);
comp = comp | (comp << 16);
comp = comp | (comp << 32);
const int rem = (size-1) % sizeof(uint64_t);
union {
volatile uint64_t value;
volatile uint8_t array[sizeof(uint64_t)];
} tmp{};
for (int i=0; i<rem; ++i) {
tmp.array[i] = byte_value;
}
const uint64_t tail = rem ? tmp.value : comp;
for (int i=0; i<n-1; ++i) {
spinwait_until_equal( buff[i], comp );
}
spinwait_until_equal( buff[n-1], tail );
}
// Force all outstanding stores from other threads to retire before allowing
// this thread to continue. This forces correctness on systems with out-of-order
// memory (Power and ARM)
Kokkos::load_fence();
return rank == 0;
}
void rendezvous_release( volatile void * buffer
, const uint64_t step
) noexcept
{
const uint8_t byte_value = static_cast<uint8_t>(step);
const uint64_t spin_value = static_cast<uint64_t>(byte_value) << (byte_value&7);
volatile uint64_t * header = reinterpret_cast<volatile uint64_t *>(buffer);
// Force all outstanding stores from this thread to retire before releasing
// the other threads. This forces correctness on systems with out-of-order
// memory (Power and ARM)
Kokkos::store_fence();
*header = spin_value;
Kokkos::memory_fence();
}
}} // namespace Kokkos::Impl

View File

@ -0,0 +1,146 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_HOST_BARRIER_HPP
#define KOKKOS_HOST_BARRIER_HPP
#include <cstddef>
#include <cstdint>
namespace Kokkos { namespace Impl {
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
enum : int { RENDEZVOUS_ALIGNMENT = 128
, RENDEZVOUS_HEADER = RENDEZVOUS_ALIGNMENT
};
inline constexpr int rendezvous_buffer_size( const int nthreads ) noexcept
{
return RENDEZVOUS_HEADER + ((nthreads-1 + RENDEZVOUS_ALIGNMENT-1) / RENDEZVOUS_ALIGNMENT) * RENDEZVOUS_ALIGNMENT;
}
void rendezvous_initialize( volatile void * buffer
, const int size
, const int rank
) noexcept;
bool rendezvous( volatile void * buffer
, uint64_t & step
, const int size
, const int rank
, bool active_wait = true
) noexcept;
void rendezvous_release( volatile void * buffer
, const uint64_t step
) noexcept;
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
class HostBarrier
{
public:
enum : int { ALIGNMENT = RENDEZVOUS_ALIGNMENT };
enum : int { HEADER = ALIGNMENT};
enum Policy : int { ACTIVE, PASSIVE };
inline static constexpr int buffer_size( const int nthreads ) noexcept
{
return rendezvous_buffer_size(nthreads);
}
HostBarrier( volatile void * arg_buffer
, int arg_size
, int arg_rank
, Policy arg_policy
) noexcept
: m_buffer{arg_buffer}
, m_size{arg_size}
, m_rank{arg_rank}
, m_policy{arg_policy}
, m_step{0}
{
rendezvous_initialize( m_buffer, m_size, m_rank );
}
bool rendezvous() const noexcept
{
return Kokkos::Impl::rendezvous( m_buffer
, m_step
, m_size
, m_rank
, m_policy == ACTIVE
);
}
void rendezvous_release() const noexcept
{
Kokkos::Impl::rendezvous_release( m_buffer, m_step );
}
private:
volatile void * m_buffer ;
const int m_size ;
const int m_rank ;
const Policy m_policy ;
mutable uint64_t m_step ;
private:
HostBarrier( const HostBarrier & ) = delete;
HostBarrier( HostBarrier && ) = delete;
HostBarrier & operator=( const HostBarrier & ) = delete;
HostBarrier & operator=( HostBarrier && ) = delete;
};
}} // namespace Kokkos::Impl
#endif // KOKKOS_HOST_BARRIER_HPP

View File

@ -206,158 +206,6 @@ void HostThreadTeamData::disband_team()
m_team_rendezvous_step = 0 ;
}
//----------------------------------------------------------------------------
/* pattern for rendezvous
*
* if ( rendezvous() ) {
* ... all other threads are still in team_rendezvous() ...
* rendezvous_release();
* ... all other threads are released from team_rendezvous() ...
* }
*/
int HostThreadTeamData::rendezvous( int64_t * const buffer
, int & rendezvous_step
, int const size
, int const rank ) noexcept
{
enum : int { shift_byte = 3 };
enum : int { size_byte = ( 01 << shift_byte ) }; // == 8
enum : int { mask_byte = size_byte - 1 };
enum : int { shift_mem_cycle = 2 };
enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4
enum : int { mask_mem_cycle = size_mem_cycle - 1 };
// Cycle step values: 1 <= step <= size_val_cycle
// An odd multiple of memory cycle so that when a memory location
// is reused it has a different value.
// Must be representable within a single byte: size_val_cycle < 16
enum : int { size_val_cycle = 3 * size_mem_cycle };
// Requires:
// Called by rank = [ 0 .. size )
// buffer aligned to int64_t[4]
// A sequence of rendezvous uses four cycled locations in memory
// and non-equal cycled synchronization values to
// 1) prevent rendezvous from overtaking one another and
// 2) give each spin wait location an int64_t[4] span
// so that it has its own cache line.
const int step = ( rendezvous_step % size_val_cycle ) + 1 ;
rendezvous_step = step ;
// The leading int64_t[4] span is for thread 0 to write
// and all other threads to read spin-wait.
// sync_offset is the index into this array for this step.
const int sync_offset = ( step & mask_mem_cycle ) + size_mem_cycle ;
if ( rank ) {
const int group_begin = rank << shift_byte ; // == rank * size_byte
if ( group_begin < size ) {
// This thread waits for threads
// [ group_begin .. group_begin + 8 )
// [ rank*8 .. rank*8 + 8 )
// to write to their designated bytes.
const int end = group_begin + size_byte < size
? size_byte : size - group_begin ;
int64_t value = 0 ;
for ( int i = 0 ; i < end ; ++i ) {
((int8_t*) & value )[i] = int8_t( step );
}
// Do not REMOVE this store fence!!!
// Makes stuff hang on GCC with more than 8 threads
store_fence();
spinwait_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ]
, value );
}
{
// This thread sets its designated byte.
// ( rank % size_byte ) +
// ( ( rank / size_byte ) * size_byte * size_mem_cycle ) +
// ( sync_offset * size_byte )
int offset = ( rank & mask_byte )
+ ( ( rank & ~mask_byte ) << shift_mem_cycle )
+ ( sync_offset << shift_byte );
// Switch designated byte if running on big endian machine
volatile uint16_t value = 1;
volatile uint8_t* byte = (uint8_t*) &value;
volatile bool is_big_endian = (!(byte[0] == 1));
if (is_big_endian) {
int remainder = ((offset) % 8);
int base = offset - remainder;
int shift = 7 - remainder;
offset = base + shift;
}
// All of this thread's previous memory stores must be complete before
// this thread stores the step value at this thread's designated byte
// in the shared synchronization array.
Kokkos::memory_fence();
((volatile int8_t*) buffer)[ offset ] = int8_t( step );
// Memory fence to push the previous store out
Kokkos::memory_fence();
}
// Wait for thread 0 to release all other threads
spinwait_until_equal( buffer[ step & mask_mem_cycle ] , int64_t(step) );
}
else {
// Thread 0 waits for threads [1..7]
// to write to their designated bytes.
const int end = size_byte < size ? 8 : size ;
int64_t value = 0 ;
for ( int i = 1 ; i < end ; ++i ) {
((int8_t *) & value)[i] = int8_t( step );
}
spinwait_until_equal( buffer[ sync_offset ], value );
}
return rank ? 0 : 1 ;
}
void HostThreadTeamData::
rendezvous_release( int64_t * const buffer
, int const rendezvous_step ) noexcept
{
enum : int { shift_mem_cycle = 2 };
enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4
enum : int { mask_mem_cycle = size_mem_cycle - 1 };
// Requires:
// Called after team_rendezvous
// Called only by true == team_rendezvous(root)
// Memory fence to be sure all previous writes are complete:
Kokkos::memory_fence();
((volatile int64_t*) buffer)[ rendezvous_step & mask_mem_cycle ] =
int64_t( rendezvous_step );
// Memory fence to push the store out
Kokkos::memory_fence();
}
//----------------------------------------------------------------------------
int HostThreadTeamData::get_work_stealing() noexcept

View File

@ -50,7 +50,7 @@
#include <Kokkos_ExecPolicy.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#include <impl/Kokkos_FunctorAnalysis.hpp>
#include <impl/Kokkos_Rendezvous.hpp>
#include <impl/Kokkos_HostBarrier.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
@ -113,50 +113,29 @@ private:
int m_league_size ;
int m_work_chunk ;
int m_steal_rank ; // work stealing rank
int mutable m_team_rendezvous_step ;
uint64_t mutable m_pool_rendezvous_step ;
uint64_t mutable m_team_rendezvous_step ;
HostThreadTeamData * team_member( int r ) const noexcept
{ return ((HostThreadTeamData**)(m_pool_scratch+m_pool_members))[m_team_base+r]; }
// Rendezvous pattern:
// if ( rendezvous(root) ) {
// ... only root thread here while all others wait ...
// rendezvous_release();
// }
// else {
// ... all other threads release here ...
// }
//
// Requires: buffer[ ( max_threads / 8 ) * 4 + 4 ]; 0 == max_threads % 8
//
static
int rendezvous( int64_t * const buffer
, int & rendezvous_step
, int const size
, int const rank ) noexcept ;
static
void rendezvous_release( int64_t * const buffer
, int const rendezvous_step ) noexcept ;
public:
inline
int team_rendezvous( int const root ) const noexcept
{
return 1 == m_team_size ? 1 :
HostThreadTeamData::
rendezvous( m_team_scratch + m_team_rendezvous
, m_team_rendezvous_step
, m_team_size
, ( m_team_rank + m_team_size - root ) % m_team_size );
, ( m_team_rank + m_team_size - root ) % m_team_size
);
}
inline
int team_rendezvous() const noexcept
{
return 1 == m_team_size ? 1 :
HostThreadTeamData::
rendezvous( m_team_scratch + m_team_rendezvous
, m_team_rendezvous_step
, m_team_size
@ -167,7 +146,6 @@ public:
void team_rendezvous_release() const noexcept
{
if ( 1 < m_team_size ) {
HostThreadTeamData::
rendezvous_release( m_team_scratch + m_team_rendezvous
, m_team_rendezvous_step );
}
@ -176,30 +154,30 @@ public:
inline
int pool_rendezvous() const noexcept
{
static constexpr int yield_wait =
static constexpr bool active_wait =
#if defined( KOKKOS_COMPILER_IBM )
// If running on IBM POWER architecture the global
// level rendzvous should immediately yield when
// waiting for other threads in the pool to arrive.
1
false
#else
0
true
#endif
;
return 1 == m_pool_size ? 1 :
Kokkos::Impl::
rendezvous( m_pool_scratch + m_pool_rendezvous
, m_pool_rendezvous_step
, m_pool_size
, m_pool_rank
, yield_wait );
, active_wait
);
}
inline
void pool_rendezvous_release() const noexcept
{
if ( 1 < m_pool_size ) {
Kokkos::Impl::
rendezvous_release( m_pool_scratch + m_pool_rendezvous );
rendezvous_release( m_pool_scratch + m_pool_rendezvous, m_pool_rendezvous_step );
}
}
@ -225,6 +203,7 @@ public:
, m_league_size(1)
, m_work_chunk(0)
, m_steal_rank(0)
, m_pool_rendezvous_step(0)
, m_team_rendezvous_step(0)
{}

View File

@ -0,0 +1,125 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <ostream>
#include <sstream>
#include <impl/Kokkos_Error.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/* Verify size constraints:
* min_block_alloc_size <= max_block_alloc_size
* max_block_alloc_size <= min_superblock_size
* min_superblock_size <= max_superblock_size
* min_superblock_size <= min_total_alloc_size
* min_superblock_size <= min_block_alloc_size *
* max_block_per_superblock
*/
void memory_pool_bounds_verification
( size_t min_block_alloc_size
, size_t max_block_alloc_size
, size_t min_superblock_size
, size_t max_superblock_size
, size_t max_block_per_superblock
, size_t min_total_alloc_size
)
{
const size_t max_superblock =
min_block_alloc_size * max_block_per_superblock ;
if ( ( size_t(max_superblock_size) < min_superblock_size ) ||
( min_total_alloc_size < min_superblock_size ) ||
( max_superblock < min_superblock_size ) ||
( min_superblock_size < max_block_alloc_size ) ||
( max_block_alloc_size < min_block_alloc_size ) ) {
std::ostringstream msg ;
msg << "Kokkos::MemoryPool size constraint violation" ;
if ( size_t(max_superblock_size) < min_superblock_size ) {
msg << " : max_superblock_size("
<< max_superblock_size
<< ") < min_superblock_size("
<< min_superblock_size << ")" ;
}
if ( min_total_alloc_size < min_superblock_size ) {
msg << " : min_total_alloc_size("
<< min_total_alloc_size
<< ") < min_superblock_size("
<< min_superblock_size << ")" ;
}
if ( max_superblock < min_superblock_size ) {
msg << " : max_superblock("
<< max_superblock
<< ") < min_superblock_size("
<< min_superblock_size << ")" ;
}
if ( min_superblock_size < max_block_alloc_size ) {
msg << " : min_superblock_size("
<< min_superblock_size
<< ") < max_block_alloc_size("
<< max_block_alloc_size << ")" ;
}
if ( max_block_alloc_size < min_block_alloc_size ) {
msg << " : max_block_alloc_size("
<< max_block_alloc_size
<< ") < min_block_alloc_size("
<< min_block_alloc_size << ")" ;
}
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
}
}
}

View File

@ -45,7 +45,9 @@
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE_HPP )
#define KOKKOS_MEMORY_FENCE_HPP
#if !defined(_OPENMP)
#include <atomic>
#endif
namespace Kokkos {
@ -54,8 +56,10 @@ namespace Kokkos {
KOKKOS_FORCEINLINE_FUNCTION
void memory_fence()
{
#if defined( __CUDA_ARCH__ )
#if defined( __CUDA_ARCH__ )
__threadfence();
#elif defined( _OPENMP )
#pragma omp flush
#else
std::atomic_thread_fence( std::memory_order_seq_cst );
#endif
@ -71,6 +75,8 @@ void store_fence()
{
#if defined( __CUDA_ARCH__ )
__threadfence();
#elif defined( _OPENMP )
#pragma omp flush
#else
std::atomic_thread_fence( std::memory_order_seq_cst );
#endif
@ -86,6 +92,8 @@ void load_fence()
{
#if defined( __CUDA_ARCH__ )
__threadfence();
#elif defined( _OPENMP )
#pragma omp flush
#else
std::atomic_thread_fence( std::memory_order_seq_cst );
#endif

View File

@ -69,6 +69,13 @@ static deallocateDataFunction deallocateDataCallee = nullptr;
static beginDeepCopyFunction beginDeepCopyCallee = nullptr;
static endDeepCopyFunction endDeepCopyCallee = nullptr;
static createProfileSectionFunction createSectionCallee = nullptr;
static startProfileSectionFunction startSectionCallee = nullptr;
static stopProfileSectionFunction stopSectionCallee = nullptr;
static destroyProfileSectionFunction destroySectionCallee = nullptr;
static profileEventFunction profileEventCallee = nullptr;
SpaceHandle::SpaceHandle(const char* space_name) {
strncpy(name,space_name,64);
}
@ -162,6 +169,37 @@ void endDeepCopy() {
}
}
void createProfileSection(const std::string& sectionName, uint32_t* secID) {
if(nullptr != createSectionCallee) {
(*createSectionCallee)(sectionName.c_str(), secID);
}
}
void startSection(const uint32_t secID) {
if(nullptr != startSectionCallee) {
(*startSectionCallee)(secID);
}
}
void stopSection(const uint32_t secID) {
if(nullptr != stopSectionCallee) {
(*stopSectionCallee)(secID);
}
}
void destroyProfileSection(const uint32_t secID) {
if(nullptr != destroySectionCallee) {
(*destroySectionCallee)(secID);
}
}
void markEvent(const std::string& eventName) {
if(nullptr != profileEventCallee) {
(*profileEventCallee)(eventName.c_str());
}
}
void initialize() {
// Make sure initialize calls happens only once
@ -230,7 +268,18 @@ void initialize() {
beginDeepCopyCallee = *((beginDeepCopyFunction*) &p13);
auto p14 = dlsym(firstProfileLibrary, "kokkosp_end_deep_copy");
endDeepCopyCallee = *((endDeepCopyFunction*) &p14);
auto p15 = dlsym(firstProfileLibrary, "kokkosp_create_profile_section");
createSectionCallee = *((createProfileSectionFunction*) &p15);
auto p16 = dlsym(firstProfileLibrary, "kokkosp_start_profile_section");
startSectionCallee = *((startProfileSectionFunction*) &p16);
auto p17 = dlsym(firstProfileLibrary, "kokkosp_stop_profile_section");
stopSectionCallee = *((stopProfileSectionFunction*) &p17);
auto p18 = dlsym(firstProfileLibrary, "kokkosp_destroy_profile_section");
destroySectionCallee = *((destroyProfileSectionFunction*) &p18);
auto p19 = dlsym(firstProfileLibrary, "kokkosp_profile_event");
profileEventCallee = *((profileEventFunction*) &p19);
}
}
@ -274,6 +323,13 @@ void finalize() {
beginDeepCopyCallee = nullptr;
endDeepCopyCallee = nullptr;
createSectionCallee = nullptr;
startSectionCallee = nullptr;
stopSectionCallee = nullptr;
destroySectionCallee = nullptr;
profileEventCallee = nullptr;
}
}
}

View File

@ -45,6 +45,7 @@
#define KOKKOSP_INTERFACE_HPP
#include <Kokkos_Macros.hpp>
#if defined(KOKKOS_ENABLE_PROFILING)
#include <cstddef>
@ -57,7 +58,7 @@
#include <iostream>
#include <cstdlib>
#define KOKKOSP_INTERFACE_VERSION 20150628
#define KOKKOSP_INTERFACE_VERSION 20171029
namespace Kokkos {
namespace Profiling {
@ -81,6 +82,13 @@ typedef void (*popFunction)();
typedef void (*allocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t);
typedef void (*deallocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t);
typedef void (*createProfileSectionFunction)(const char*, uint32_t*);
typedef void (*startProfileSectionFunction)(const uint32_t);
typedef void (*stopProfileSectionFunction)(const uint32_t);
typedef void (*destroyProfileSectionFunction)(const uint32_t);
typedef void (*profileEventFunction)(const char*);
typedef void (*beginDeepCopyFunction)(
SpaceHandle, const char*, const void*,
SpaceHandle, const char*, const void*,
@ -99,6 +107,13 @@ void endParallelReduce(const uint64_t kernelID);
void pushRegion(const std::string& kName);
void popRegion();
void createProfileSection(const std::string& sectionName, uint32_t* secID);
void startSection(const uint32_t secID);
void stopSection(const uint32_t secID);
void destroyProfileSection(const uint32_t secID);
void markEvent(const std::string* evName);
void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size);
void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size);

View File

@ -1,219 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#include <Kokkos_Atomic.hpp>
#include <impl/Kokkos_Rendezvous.hpp>
#include <impl/Kokkos_Spinwait.hpp>
namespace Kokkos { namespace Impl {
//----------------------------------------------------------------------------
/* pattern for rendezvous
*
* if ( rendezvous() ) {
* ... all other threads are still in team_rendezvous() ...
* rendezvous_release();
* ... all other threads are released from team_rendezvous() ...
* }
*/
int rendezvous( volatile int64_t * const buffer
, int const size
, int const rank
, int const slow
) noexcept
{
enum : int { shift_byte = 3 };
enum : int { size_byte = ( 01 << shift_byte ) }; // == 8
enum : int { mask_byte = size_byte - 1 };
enum : int { shift_mem_cycle = 2 };
enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4
enum : int { mask_mem_cycle = size_mem_cycle - 1 };
// Cycle step values: 1 <= step <= size_val_cycle
// An odd multiple of memory cycle so that when a memory location
// is reused it has a different value.
// Must be representable within a single byte: size_val_cycle < 16
enum : int { size_val_cycle = 3 * size_mem_cycle };
// Requires:
// Called by rank = [ 0 .. size )
// buffer aligned to int64_t[4]
// A sequence of rendezvous uses four cycled locations in memory
// and non-equal cycled synchronization values to
// 1) prevent rendezvous from overtaking one another and
// 2) give each spin wait location an int64_t[4] span
// so that it has its own cache line.
const int64_t step = (buffer[0] % size_val_cycle ) + 1 ;
// The leading int64_t[4] span is for thread 0 to write
// and all other threads to read spin-wait.
// sync_offset is the index into this array for this step.
const int sync_offset = ( step & mask_mem_cycle ) + size_mem_cycle + size_mem_cycle ;
if ( rank ) {
const int group_begin = rank << shift_byte ; // == rank * size_byte
if ( group_begin < size ) {
// This thread waits for threads
// [ group_begin .. group_begin + 8 )
// [ rank*8 .. rank*8 + 8 )
// to write to their designated bytes.
const int end = group_begin + size_byte < size
? size_byte : size - group_begin ;
int64_t value = 0;
for ( int i = 0 ; i < end ; ++i ) {
value |= step << (i * size_byte );
}
store_fence(); // This should not be needed but fixes #742
if ( slow ) {
yield_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ]
, value );
}
else {
spinwait_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ]
, value );
}
}
{
// This thread sets its designated byte.
// ( rank % size_byte ) +
// ( ( rank / size_byte ) * size_byte * size_mem_cycle ) +
// ( sync_offset * size_byte )
int offset = ( rank & mask_byte )
+ ( ( rank & ~mask_byte ) << shift_mem_cycle )
+ ( sync_offset << shift_byte );
// Switch designated byte if running on big endian machine
volatile uint16_t value = 1;
volatile uint8_t* byte = (uint8_t*) &value;
volatile bool is_big_endian = (!(byte[0] == 1));
if (is_big_endian) {
int remainder = ((offset) % 8);
int base = offset - remainder;
int shift = 7 - remainder;
offset = base + shift;
}
// All of this thread's previous memory stores must be complete before
// this thread stores the step value at this thread's designated byte
// in the shared synchronization array.
Kokkos::memory_fence();
((volatile int8_t*) buffer)[ offset ] = int8_t( step );
// Memory fence to push the previous store out
Kokkos::memory_fence();
}
// Wait for thread 0 to release all other threads
if ( slow ) {
yield_until_equal( buffer[ (step & mask_mem_cycle) + size_mem_cycle ] , int64_t(step) );
}
else {
spinwait_until_equal( buffer[ (step & mask_mem_cycle) + size_mem_cycle ] , int64_t(step) );
}
}
else {
// Thread 0 waits for threads [1..7]
// to write to their designated bytes.
const int end = size_byte < size ? 8 : size ;
int64_t value = 0;
for ( int i = 1 ; i < end ; ++i ) {
value |= step << (i * size_byte );
}
if ( slow ) {
yield_until_equal( buffer[ sync_offset ], value );
}
else {
spinwait_until_equal( buffer[ sync_offset ], value );
}
}
return rank ? 0 : 1 ;
}
void rendezvous_release( volatile int64_t * const buffer ) noexcept
{
enum : int { shift_mem_cycle = 2 };
enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4
enum : int { mask_mem_cycle = size_mem_cycle - 1 };
enum : int { size_val_cycle = 3 * size_mem_cycle };
// Requires:
// Called after team_rendezvous
// Called only by true == team_rendezvous(root)
// update step
const int64_t step = (buffer[0] % size_val_cycle ) + 1;
buffer[0] = step;
// Memory fence to be sure all previous writes are complete:
Kokkos::memory_fence();
buffer[ (step & mask_mem_cycle) + size_mem_cycle ] = step;
// Memory fence to push the store out
Kokkos::memory_fence();
}
}} // namespace Kokkos::Impl

View File

@ -1,87 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_RENDEZVOUS_HPP
#define KOKKOS_IMPL_RENDEZVOUS_HPP
#include <cstdint>
namespace Kokkos { namespace Impl {
inline
constexpr int rendezvous_buffer_size( int max_members ) noexcept
{
return (((max_members + 7) / 8) * 4) + 4 + 4;
}
/** \brief Thread pool rendezvous
*
* Rendezvous pattern:
* if ( rendezvous(root) ) {
* ... only root thread here while all others wait ...
* rendezvous_release();
* }
* else {
* ... all other threads release here ...
* }
*
* Requires: buffer[ rendezvous_buffer_size( max_threads ) ];
*
* When slow != 0 the expectation is thread arrival will be
* slow so the threads that arrive early should quickly yield
* their core to the runtime thus possibly allowing the late
* arriving threads to have more resources
* (e.g., power and clock frequency).
*/
int rendezvous( volatile int64_t * const buffer
, int const size
, int const rank
, int const slow = 0 ) noexcept ;
void rendezvous_release( volatile int64_t * const buffer ) noexcept ;
}} // namespace Kokkos::Impl
#endif // KOKKOS_IMPL_RENDEZVOUS_HPP

View File

@ -60,6 +60,8 @@ namespace {
HostThreadTeamData g_serial_thread_team_data ;
bool g_serial_is_initialized = false;
}
// Resize thread team data scratch memory
@ -136,9 +138,9 @@ HostThreadTeamData * serial_get_thread_team_data()
namespace Kokkos {
int Serial::is_initialized()
bool Serial::is_initialized()
{
return 1 ;
return Impl::g_serial_is_initialized ;
}
void Serial::initialize( unsigned threads_count
@ -158,6 +160,8 @@ void Serial::initialize( unsigned threads_count
#if defined(KOKKOS_ENABLE_PROFILING)
Kokkos::Profiling::initialize();
#endif
Impl::g_serial_is_initialized = true;
}
void Serial::finalize()
@ -177,6 +181,8 @@ void Serial::finalize()
#if defined(KOKKOS_ENABLE_PROFILING)
Kokkos::Profiling::finalize();
#endif
Impl::g_serial_is_initialized = false;
}
const char* Serial::name() { return "Serial"; }

View File

@ -47,6 +47,7 @@
#include <cstddef>
#include <cstdint>
#include <Kokkos_Macros.hpp>
#include <impl/Kokkos_BitOps.hpp>
#include <string>
#include <type_traits>
@ -435,21 +436,12 @@ struct power_of_two<1,true>
/** \brief If power of two then return power,
* otherwise return ~0u.
*/
static KOKKOS_FORCEINLINE_FUNCTION
KOKKOS_FORCEINLINE_FUNCTION
unsigned power_of_two_if_valid( const unsigned N )
{
unsigned p = ~0u ;
if ( N && ! ( N & ( N - 1 ) ) ) {
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_ENABLE_CUDA )
p = __ffs(N) - 1 ;
#elif defined( __GNUC__ ) || defined( __GNUG__ )
p = __builtin_ffs(N) - 1 ;
#elif defined( __INTEL_COMPILER )
p = _bit_scan_forward(N);
#else
p = 0 ;
for ( unsigned j = 1 ; ! ( N & j ) ; j <<= 1 ) { ++p ; }
#endif
if ( is_integral_power_of_two ( N ) ) {
p = bit_scan_forward ( N ) ;
}
return p ;
}

View File

@ -144,9 +144,9 @@ public:
//----------------------------------------
KOKKOS_FUNCTION_DEFAULTED ~ViewOffset() = default ;
KOKKOS_INLINE_FUNCTION ViewOffset() = default ;
KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & ) = default ;
KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & ) = default ;
KOKKOS_FUNCTION_DEFAULTED ViewOffset() = default ;
KOKKOS_FUNCTION_DEFAULTED ViewOffset( const ViewOffset & ) = default ;
KOKKOS_FUNCTION_DEFAULTED ViewOffset & operator = ( const ViewOffset & ) = default ;
template< unsigned TrivialScalarSize >
KOKKOS_INLINE_FUNCTION

View File

@ -2,7 +2,16 @@
# Add test-only library for gtest to be reused by all the subpackages
#
IF(NOT KOKKOS_HAS_TRILINOS)
IF(KOKKOS_SEPARATE_LIBS)
set(TEST_LINK_TARGETS kokkoscore)
ELSE()
set(TEST_LINK_TARGETS kokkos)
ENDIF()
ENDIF()
SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0")
INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR})
TRIBITS_ADD_LIBRARY(
@ -63,7 +72,7 @@ IF(Kokkos_ENABLE_Serial)
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)
ENDIF()
@ -111,7 +120,7 @@ IF(Kokkos_ENABLE_Pthread)
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)
ENDIF()
@ -160,7 +169,7 @@ IF(Kokkos_ENABLE_OpenMP)
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)
ENDIF()
@ -194,7 +203,7 @@ IF(Kokkos_ENABLE_Qthreads)
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)
ENDIF()
@ -251,10 +260,11 @@ IF(Kokkos_ENABLE_Cuda)
cuda/TestCuda_ViewOfClass.cpp
cuda/TestCuda_Crs.cpp
cuda/TestCuda_WorkGraph.cpp
cuda/TestCuda_UniqueToken.cpp
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)
ENDIF()
@ -271,7 +281,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)
foreach(INITTESTS_NUM RANGE 1 16)
@ -281,7 +291,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)
endforeach(INITTESTS_NUM)
@ -291,5 +301,5 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
)

View File

@ -27,7 +27,8 @@ endif
CXXFLAGS = -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
LDFLAGS ?=
override LDFLAGS += -lpthread
include $(KOKKOS_PATH)/Makefile.kokkos
@ -329,7 +330,7 @@ KokkosCore_UnitTest_HWLOC: $(OBJ_HWLOC) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(EXTRA_PATH) $(OBJ_HWLOC) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HWLOC
KokkosCore_UnitTest_AllocationTracker: $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(EXTRA_PATH) $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LIBS) $( $(KOKKOS_LDFLAGS) $(LDFLAGS)LIB) -o KokkosCore_UnitTest_AllocationTracker
$(LINK) $(EXTRA_PATH) $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LIBS) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(LIB) -o KokkosCore_UnitTest_AllocationTracker
KokkosCore_UnitTest_Default: $(OBJ_DEFAULT) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(EXTRA_PATH) $(OBJ_DEFAULT) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Default

View File

@ -475,6 +475,8 @@ public:
namespace Test {
struct ReducerTag {};
template< class Scalar, class ExecSpace = Kokkos::DefaultExecutionSpace >
struct TestReducers {
struct SumFunctor {
@ -590,6 +592,118 @@ struct TestReducers {
}
};
struct SumFunctorTag {
Kokkos::View< const Scalar*, ExecSpace > values;
KOKKOS_INLINE_FUNCTION
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
value += values( i );
}
};
struct ProdFunctorTag {
Kokkos::View< const Scalar*, ExecSpace > values;
KOKKOS_INLINE_FUNCTION
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
value *= values( i );
}
};
struct MinFunctorTag {
Kokkos::View< const Scalar*, ExecSpace > values;
KOKKOS_INLINE_FUNCTION
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
if ( values( i ) < value ) value = values( i );
}
};
struct MaxFunctorTag {
Kokkos::View< const Scalar*, ExecSpace > values;
KOKKOS_INLINE_FUNCTION
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
if ( values( i ) > value ) value = values( i );
}
};
struct MinLocFunctorTag {
Kokkos::View< const Scalar*, ExecSpace > values;
KOKKOS_INLINE_FUNCTION
void operator()( const ReducerTag, const int & i, typename Kokkos::Experimental::MinLoc< Scalar, int >::value_type & value ) const {
if ( values( i ) < value.val ) {
value.val = values( i );
value.loc = i;
}
}
};
struct MaxLocFunctorTag {
Kokkos::View< const Scalar*, ExecSpace > values;
KOKKOS_INLINE_FUNCTION
void operator()( const ReducerTag, const int & i, typename Kokkos::Experimental::MaxLoc< Scalar, int >::value_type & value ) const {
if ( values( i ) > value.val ) {
value.val = values( i );
value.loc = i;
}
}
};
struct MinMaxLocFunctorTag {
Kokkos::View< const Scalar*, ExecSpace > values;
KOKKOS_INLINE_FUNCTION
void operator()( const ReducerTag, const int & i, typename Kokkos::Experimental::MinMaxLoc< Scalar, int >::value_type & value ) const {
if ( values( i ) > value.max_val ) {
value.max_val = values( i );
value.max_loc = i;
}
if ( values( i ) < value.min_val ) {
value.min_val = values( i );
value.min_loc = i;
}
}
};
struct BAndFunctorTag {
Kokkos::View< const Scalar*, ExecSpace > values;
KOKKOS_INLINE_FUNCTION
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
value = value & values( i );
}
};
struct BOrFunctorTag {
Kokkos::View< const Scalar*, ExecSpace > values;
KOKKOS_INLINE_FUNCTION
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
value = value | values( i );
}
};
struct LAndFunctorTag {
Kokkos::View< const Scalar*, ExecSpace > values;
KOKKOS_INLINE_FUNCTION
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
value = value && values( i );
}
};
struct LOrFunctorTag {
Kokkos::View< const Scalar*, ExecSpace > values;
KOKKOS_INLINE_FUNCTION
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
value = value || values( i );
}
};
static void test_sum( int N ) {
Kokkos::View< Scalar*, ExecSpace > values( "Values", N );
auto h_values = Kokkos::create_mirror_view( values );
@ -603,13 +717,19 @@ struct TestReducers {
SumFunctor f;
f.values = values;
SumFunctorTag f_tag;
f_tag.values = values;
Scalar init = 0;
{
Scalar sum_scalar = init;
Kokkos::Experimental::Sum< Scalar > reducer_scalar( sum_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
ASSERT_EQ( sum_scalar, reference_sum );
sum_scalar = init;
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
ASSERT_EQ( sum_scalar, reference_sum );
Scalar sum_scalar_view = reducer_scalar.reference();
@ -643,13 +763,19 @@ struct TestReducers {
ProdFunctor f;
f.values = values;
ProdFunctorTag f_tag;
f_tag.values = values;
Scalar init = 1;
{
Scalar prod_scalar = init;
Kokkos::Experimental::Prod< Scalar > reducer_scalar( prod_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
ASSERT_EQ( prod_scalar, reference_prod );
prod_scalar = init;
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
ASSERT_EQ( prod_scalar, reference_prod );
Scalar prod_scalar_view = reducer_scalar.reference();
@ -684,13 +810,19 @@ struct TestReducers {
MinFunctor f;
f.values = values;
MinFunctorTag f_tag;
f_tag.values = values;
Scalar init = std::numeric_limits< Scalar >::max();
{
Scalar min_scalar = init;
Kokkos::Experimental::Min< Scalar > reducer_scalar( min_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
ASSERT_EQ( min_scalar, reference_min );
min_scalar = init;
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
ASSERT_EQ( min_scalar, reference_min );
Scalar min_scalar_view = reducer_scalar.reference();
@ -725,13 +857,19 @@ struct TestReducers {
MaxFunctor f;
f.values = values;
MaxFunctorTag f_tag;
f_tag.values = values;
Scalar init = std::numeric_limits< Scalar >::min();
{
Scalar max_scalar = init;
Kokkos::Experimental::Max< Scalar > reducer_scalar( max_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
ASSERT_EQ( max_scalar, reference_max );
max_scalar = init;
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
ASSERT_EQ( max_scalar, reference_max );
Scalar max_scalar_view = reducer_scalar.reference();
@ -776,12 +914,19 @@ struct TestReducers {
MinLocFunctor f;
f.values = values;
MinLocFunctorTag f_tag;
f_tag.values = values;
{
value_type min_scalar;
Kokkos::Experimental::MinLoc< Scalar, int > reducer_scalar( min_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
ASSERT_EQ( min_scalar.val, reference_min );
ASSERT_EQ( min_scalar.loc, reference_loc );
min_scalar = value_type();
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
ASSERT_EQ( min_scalar.val, reference_min );
ASSERT_EQ( min_scalar.loc, reference_loc );
@ -829,12 +974,19 @@ struct TestReducers {
MaxLocFunctor f;
f.values = values;
MaxLocFunctorTag f_tag;
f_tag.values = values;
{
value_type max_scalar;
Kokkos::Experimental::MaxLoc< Scalar, int > reducer_scalar( max_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
ASSERT_EQ( max_scalar.val, reference_max );
ASSERT_EQ( max_scalar.loc, reference_loc );
max_scalar = value_type();
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
ASSERT_EQ( max_scalar.val, reference_max );
ASSERT_EQ( max_scalar.loc, reference_loc );
@ -898,12 +1050,35 @@ struct TestReducers {
MinMaxLocFunctor f;
f.values = values;
MinMaxLocFunctorTag f_tag;
f_tag.values = values;
{
value_type minmax_scalar;
Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_scalar( minmax_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
ASSERT_EQ( minmax_scalar.min_val, reference_min );
for ( int i = 0; i < N; i++ ) {
if ( ( i == minmax_scalar.min_loc ) && ( h_values( i ) == reference_min ) ) {
reference_minloc = i;
}
}
ASSERT_EQ( minmax_scalar.min_loc, reference_minloc );
ASSERT_EQ( minmax_scalar.max_val, reference_max );
for ( int i = 0; i < N; i++ ) {
if ( ( i == minmax_scalar.max_loc ) && ( h_values( i ) == reference_max ) ) {
reference_maxloc = i;
}
}
ASSERT_EQ( minmax_scalar.max_loc, reference_maxloc );
minmax_scalar = value_type();
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
ASSERT_EQ( minmax_scalar.min_val, reference_min );
for ( int i = 0; i < N; i++ ) {
@ -962,14 +1137,21 @@ struct TestReducers {
BAndFunctor f;
f.values = values;
BAndFunctorTag f_tag;
f_tag.values = values;
Scalar init = Scalar() | ( ~Scalar() );
{
Scalar band_scalar = init;
Kokkos::Experimental::BAnd< Scalar > reducer_scalar( band_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
ASSERT_EQ( band_scalar, reference_band );
band_scalar = init;
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
ASSERT_EQ( band_scalar, reference_band );
Scalar band_scalar_view = reducer_scalar.reference();
ASSERT_EQ( band_scalar_view, reference_band );
@ -1002,13 +1184,19 @@ struct TestReducers {
BOrFunctor f;
f.values = values;
BOrFunctorTag f_tag;
f_tag.values = values;
Scalar init = Scalar() & ( ~Scalar() );
{
Scalar bor_scalar = init;
Kokkos::Experimental::BOr< Scalar > reducer_scalar( bor_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
ASSERT_EQ( bor_scalar, reference_bor );
bor_scalar = init;
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
ASSERT_EQ( bor_scalar, reference_bor );
Scalar bor_scalar_view = reducer_scalar.reference();
@ -1042,13 +1230,19 @@ struct TestReducers {
LAndFunctor f;
f.values = values;
LAndFunctorTag f_tag;
f_tag.values = values;
Scalar init = 1;
{
Scalar land_scalar = init;
Kokkos::Experimental::LAnd< Scalar > reducer_scalar( land_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
ASSERT_EQ( land_scalar, reference_land );
land_scalar = init;
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
ASSERT_EQ( land_scalar, reference_land );
Scalar land_scalar_view = reducer_scalar.reference();
@ -1082,13 +1276,19 @@ struct TestReducers {
LOrFunctor f;
f.values = values;
LOrFunctorTag f_tag;
f_tag.values = values;
Scalar init = 0;
{
Scalar lor_scalar = init;
Kokkos::Experimental::LOr< Scalar > reducer_scalar( lor_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
ASSERT_EQ( lor_scalar, reference_lor );
lor_scalar = init;
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
ASSERT_EQ( lor_scalar, reference_lor );
Scalar lor_scalar_view = reducer_scalar.reference();

View File

@ -46,6 +46,7 @@
#include <impl/Kokkos_Timer.hpp>
#include <iostream>
#include <cstdlib>
#include <cstdint>
namespace TestTeamVector {
@ -840,7 +841,8 @@ public:
const ScalarType solution = (ScalarType) nrows * (ScalarType) ncols;
if ( int64_t(solution) != int64_t(result) ) {
printf( " TestTripleNestedReduce failed solution(%ld) != result(%ld), nrows(%d) ncols(%d) league_size(%d) team_size(%d)\n"
printf( " TestTripleNestedReduce failed solution(%" PRId64 ") != result(%" PRId64 "),"
" nrows(%" PRId32 ") ncols(%" PRId32 ") league_size(%" PRId32 ") team_size(%" PRId32 ")\n"
, int64_t(solution)
, int64_t(result)
, int32_t(nrows)

View File

@ -79,14 +79,18 @@ struct TestViewMappingSubview
typedef Kokkos::View< int***[13][14], Kokkos::LayoutLeft, ExecSpace > DLT;
typedef Kokkos::Subview< DLT, range, int, int, int, int > DLS1;
#if !defined(KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND)
static_assert( DLS1::rank == 1 && std::is_same< typename DLS1::array_layout, Kokkos::LayoutLeft >::value
, "Subview layout error for rank 1 subview of left-most range of LayoutLeft" );
#endif
typedef Kokkos::View< int***[13][14], Kokkos::LayoutRight, ExecSpace > DRT;
typedef Kokkos::Subview< DRT, int, int, int, int, range > DRS1;
#if !defined(KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND)
static_assert( DRS1::rank == 1 && std::is_same< typename DRS1::array_layout, Kokkos::LayoutRight >::value
, "Subview layout error for rank 1 subview of right-most range of LayoutRight" );
#endif
AT Aa;
AS Ab;

View File

@ -0,0 +1,52 @@
KOKKOS_PATH = ../..
# See $(KOKKOS_PATH)/Makefile.kokkos and $(KOKKOS_PATH)/generate_makefile.bash
KOKKOS_ARCH_OPTIONS="None AMDAVX ARMv80 ARMv81 ARMv8-ThunderX \
BGQ Power7 Power8 Power9 \
WSM SNB HSW BDW SKX KNC KNL \
Kepler Kepler30 Kepler32 Kepler35 Kepler37 \
Maxwell Maxwell50 Maxwell52 Maxwell53 Pascal60 Pascal61"
#KOKKOS_ARCH_OPTIONS="AMDAVX"
KOKKOS_DEVICE_OPTIONS="Cuda ROCm OpenMP Pthread Serial Qthreads"
#KOKKOS_DEVICE_OPTIONS="Cuda"
# Configure paths to enable environment query in Makefile.kokkos to work
ROCM_HCC_PATH="config"
CXX="./config/cxx"
ipath=env CXX=$(CXX) env PATH=./config:$$PATH env ROCM_HCC_PATH=$(ROCM_HCC_PATH)
# Defined in core/src/Makefile -- this should be consistent
KOKKOS_MAKEFILE=Makefile.kokkos
KOKKOS_CMAKEFILE=kokkos_generated_settings.cmake
# Defined in Makefile.kokkos -- this should be consistent
KOKKOS_INTERNAL_CONFIG_TMP=KokkosCore_config.tmp
KOKKOS_CONFIG_HEADER=KokkosCore_config.h
d='\#'
# diff => 0 is no difference. if => 0 is false
testmake=if test "`testmake.sh $1 $2 $3`" = 'Passed'; then echo OK $d $1; else echo not OK $d $1; fi
testconf=if test "`diffconfig.sh $1`" = 'Passed'; then echo OK $d $1; else echo not OK $d $1; fi
# testing tmp and cmakefile files is unnecessary here
test:
@for karch in "$(KOKKOS_ARCH_OPTIONS)"; do \
for device in "$(KOKKOS_DEVICE_OPTIONS)"; do \
$(ipath) KOKKOS_DEVICES=$$device KOKKOS_ARCH=$$karch make -e -f ../src/Makefile build-makefile-cmake-kokkos; \
rm -f $(KOKKOS_INTERNAL_CONFIG_TMP) $(KOKKOS_CMAKEFILE); \
prfx="$$karch"_"$$device"_; \
newmake="$$prfx"$(KOKKOS_MAKEFILE); \
newconf="$$prfx"$(KOKKOS_CONFIG_HEADER); \
mv $(KOKKOS_MAKEFILE) config/tmpstore/$$newmake; \
mv $(KOKKOS_CONFIG_HEADER) config/tmpstore/$$newconf; \
$(call testmake,$$newmake,$$karch,$$device); \
$(call testconf,$$newconf); \
done; \
done
test-cmake:
@cd config/cmaketest; \
cmake . ; \
make test

View File

@ -0,0 +1,2 @@
#!/bin/sh
echo "--foo --bar"

View File

@ -0,0 +1,5 @@
#!/bin/sh
echo="Apple LLVM version 8.1.0 (clang-802.0.42)"
echo="Target: x86_64-apple-darwin16.7.0"
echo="Thread model: posix"
echo="InstalledDir: /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin"

View File

@ -0,0 +1,80 @@
cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
project(Kokkos CXX)
enable_testing()
# Initialization
get_filename_component(KOKKOS_TESTDIR ${CMAKE_SOURCE_DIR}/../.. REALPATH)
get_filename_component(KOKKOS_SRCDIR ${CMAKE_SOURCE_DIR}/../../../.. REALPATH)
set(KOKKOS_SRC_PATH ${KOKKOS_SRCDIR})
set(KOKKOS_PATH ${KOKKOS_SRC_PATH})
set(CXX ${KOKKOS_TESTDIR}/config/cxx)
# Defined in core/src/Makefile -- this should be consistent
set(KOKKOS_MAKEFILE Makefile.kokkos)
set(KOKKOS_CMAKEFILE kokkos_generated_settings.cmake)
# Defined in Makefile.kokkos -- this should be consistent
set(KOKKOS_INTERNAL_CONFIG_TMP KokkosCore_config.tmp)
set(KOKKOS_CONFIG_HEADER KokkosCore_config.h)
set(KOKKOS_CMAKE_VERBOSE False)
include(${KOKKOS_SRCDIR}/cmake/kokkos_options.cmake)
foreach(KOKKOS_DEV ${KOKKOS_DEVICES_LIST})
# Do some initialization: Want to turn everything off for testing
string(TOUPPER ${KOKKOS_DEV} KOKKOS_DEVUC)
set(KOKKOS_ENABLE_${KOKKOS_DEVUC} OFF)
endforeach()
#TEST set(KOKKOS_HOST_ARCH_LIST ARMv80)
#TEST set(KOKKOS_DEVICES_LIST Cuda)
#set(KOKKOS_HOST_ARCH_LIST AMDAVX)
#set(KOKKOS_DEVICES_LIST Cuda)
foreach(KOKKOS_HOST_ARCH ${KOKKOS_HOST_ARCH_LIST})
foreach(KOKKOS_DEV ${KOKKOS_DEVICES_LIST})
string(TOUPPER ${KOKKOS_DEV} KOKKOS_DEVUC)
set(KOKKOS_ENABLE_${KOKKOS_DEVUC} On)
set(KOKKOS_CMAKE_VERBOSE True)
include(${KOKKOS_SRCDIR}/cmake/kokkos_options.cmake)
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ROCM_HCC_PATH=${KOKKOS_TESTDIR}/config)
#message(STATUS "${KOKKOS_SETTINGS} make -f ${KOKKOS_SRCDIR}/core/src/Makefile build-makefile-cmake-kokkos")
execute_process(
COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRCDIR}/core/src/Makefile build-makefile-cmake-kokkos
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
OUTPUT_FILE ${CMAKE_BINARY_DIR}/core_src_make.out
RESULT_VARIABLE res
)
#message(STATUS "RESULT ${res}")
file(REMOVE ${KOKKOS_INTERNAL_CONFIG_TMP} ${KOKKOS_MAKEFILE})
set(PREFIX "${KOKKOS_HOST_ARCH}_${KOKKOS_DEV}_")
set(NEWCMAKE ${PREFIX}${KOKKOS_CMAKEFILE})
set(NEWCONFH ${PREFIX}${KOKKOS_CONFIG_HEADER})
file(RENAME ${KOKKOS_CMAKEFILE} ${NEWCMAKE})
file(RENAME ${KOKKOS_CONFIG_HEADER} ${NEWCONFH})
add_test(NAME ${NEWCMAKE}-test
COMMAND ${KOKKOS_TESTDIR}/testmake.sh ${NEWCMAKE} ${KOKKOS_HOST_ARCH} ${KOKKOS_DEV}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)
set_tests_properties(${NEWCMAKE}-test
PROPERTIES PASS_REGULAR_EXPRESSION Passed
TIMEOUT 15
)
add_test(NAME ${NEWCONFH}-test
COMMAND ${KOKKOS_TESTDIR}/diffconfig.sh ${NEWCONFH}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)
set_tests_properties(${NEWCONFH}-test
PROPERTIES PASS_REGULAR_EXPRESSION Passed
TIMEOUT 15
)
set(KOKKOS_ENABLE_${KOKKOS_DEVUC} Off)
endforeach()
endforeach()

View File

@ -0,0 +1,5 @@
#!/bin/sh
echo "g++ (GCC) 6.3.1 20161221 (Red Hat 6.3.1-1)"
echo "Copyright (C) 2016 Free Software Foundation, Inc."
echo "This is free software; see the source for copying conditions. There is NO"
echo "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."

View File

@ -0,0 +1,5 @@
#!/bin/sh
echo "g++ (GCC) 6.3.1 20161221 (Red Hat 6.3.1-1)"
echo "Copyright (C) 2016 Free Software Foundation, Inc."
echo "This is free software; see the source for copying conditions. There is NO"
echo "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."

View File

@ -0,0 +1,5 @@
#!/bin/sh
echo "nvcc: NVIDIA (R) Cuda compiler driver"
echo "Copyright (c) 2005-2016 NVIDIA Corporation"
echo "Built on Tue_Jan_10_13:22:03_CST_2017"
echo "Cuda compilation tools, release 8.0, V8.0.61"

View File

@ -0,0 +1,18 @@
/* ---------------------------------------------
Makefile constructed configuration:
Fri Sep 22 17:22:09 MDT 2017
----------------------------------------------*/
#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)
#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
#else
#define KOKKOS_CORE_CONFIG_H
#endif
/* Execution Spaces */
#define KOKKOS_HAVE_CUDA 1
#define KOKKOS_HAVE_SERIAL 1
/* General Settings */
#define KOKKOS_HAVE_CXX11 1
#define KOKKOS_ENABLE_PROFILING
/* Optimization Settings */
/* Cuda Settings */
#define KOKKOS_ARCH_AVX 1

Some files were not shown because too many files have changed in this diff Show More