Update to Kokkos library v2.5.00
This commit is contained in:
@ -1,4 +1,44 @@
|
||||
# Change Log
|
||||
|
||||
## [2.5.00](https://github.com/kokkos/kokkos/tree/2.5.00) (2017-12-15)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.11...2.5.00)
|
||||
|
||||
**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.5**
|
||||
|
||||
**Implemented enhancements:**
|
||||
|
||||
- Provide Makefile.kokkos logic for CMake and TriBITS [\#878](https://github.com/kokkos/kokkos/issues/878)
|
||||
- Add Scatter View [\#825](https://github.com/kokkos/kokkos/issues/825)
|
||||
- Drop gcc 4.7 and intel 14 from supported compiler list [\#603](https://github.com/kokkos/kokkos/issues/603)
|
||||
- Enable construction of unmanaged view using common\_view\_alloc\_prop [\#1170](https://github.com/kokkos/kokkos/issues/1170)
|
||||
- Unused Function Warning with XL [\#1267](https://github.com/kokkos/kokkos/issues/1267)
|
||||
- Add memory pool parameter check [\#1218](https://github.com/kokkos/kokkos/issues/1218)
|
||||
- CUDA9: Fix warning for unsupported long double [\#1189](https://github.com/kokkos/kokkos/issues/1189)
|
||||
- CUDA9: fix warning on defaulted function marking [\#1188](https://github.com/kokkos/kokkos/issues/1188)
|
||||
- CUDA9: fix warnings for deprecated warp level functions [\#1187](https://github.com/kokkos/kokkos/issues/1187)
|
||||
- Add CUDA 9.0 nightly testing [\#1174](https://github.com/kokkos/kokkos/issues/1174)
|
||||
- {OMPI,MPICH}\_CXX hack breaks nvcc\_wrapper use case [\#1166](https://github.com/kokkos/kokkos/issues/1166)
|
||||
- KOKKOS\_HAVE\_CUDA\_LAMBDA became KOKKOS\_CUDA\_USE\_LAMBDA [\#1274](https://github.com/kokkos/kokkos/issues/1274)
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
- MinMax Reducer with tagged operator doesn't compile [\#1251](https://github.com/kokkos/kokkos/issues/1251)
|
||||
- Reducers for Tagged operators give wrong answer [\#1250](https://github.com/kokkos/kokkos/issues/1250)
|
||||
- Kokkos not Compatible with Big Endian Machines? [\#1235](https://github.com/kokkos/kokkos/issues/1235)
|
||||
- Parallel Scan hangs forever on BG/Q [\#1234](https://github.com/kokkos/kokkos/issues/1234)
|
||||
- Threads backend doesn't compile with Clang on OS X [\#1232](https://github.com/kokkos/kokkos/issues/1232)
|
||||
- $\(shell date\) needs quote [\#1264](https://github.com/kokkos/kokkos/issues/1264)
|
||||
- Unqualified parallel\_for call conflicts with user-defined parallel\_for [\#1219](https://github.com/kokkos/kokkos/issues/1219)
|
||||
- KokkosAlgorithms: CMake issue in unit tests [\#1212](https://github.com/kokkos/kokkos/issues/1212)
|
||||
- Intel 18 Error: "simd pragma has been deprecated" [\#1210](https://github.com/kokkos/kokkos/issues/1210)
|
||||
- Memory leak in Kokkos::initialize [\#1194](https://github.com/kokkos/kokkos/issues/1194)
|
||||
- CUDA9: compiler error with static assert template arguments [\#1190](https://github.com/kokkos/kokkos/issues/1190)
|
||||
- Kokkos::Serial::is\_initialized returns always true [\#1184](https://github.com/kokkos/kokkos/issues/1184)
|
||||
- Triple nested parallelism still fails on bowman [\#1093](https://github.com/kokkos/kokkos/issues/1093)
|
||||
- OpenMP openmp.range on Develop Runs Forever on POWER7+ with RHEL7 and GCC4.8.5 [\#995](https://github.com/kokkos/kokkos/issues/995)
|
||||
- Rendezvous performance at global scope [\#985](https://github.com/kokkos/kokkos/issues/985)
|
||||
|
||||
|
||||
## [2.04.11](https://github.com/kokkos/kokkos/tree/2.04.11) (2017-10-28)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.04...2.04.11)
|
||||
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
# Is this a build as part of Trilinos?
|
||||
|
||||
IF(COMMAND TRIBITS_PACKAGE_DECL)
|
||||
SET(KOKKOS_HAS_TRILINOS ON CACHE BOOL "")
|
||||
ELSE()
|
||||
@ -6,13 +8,57 @@ ENDIF()
|
||||
|
||||
IF(NOT KOKKOS_HAS_TRILINOS)
|
||||
cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
|
||||
project(Kokkos CXX)
|
||||
|
||||
INCLUDE(cmake/kokkos.cmake)
|
||||
# Define Project Name if this is a standalone build
|
||||
IF(NOT DEFINED ${PROJECT_NAME})
|
||||
project(Kokkos CXX)
|
||||
ENDIF()
|
||||
|
||||
# Basic initialization (Used in KOKKOS_SETTINGS)
|
||||
set(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR})
|
||||
set(KOKKOS_PATH ${KOKKOS_SRC_PATH})
|
||||
|
||||
#------------ COMPILER AND FEATURE CHECKS ------------------------------------
|
||||
include(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake)
|
||||
set_kokkos_cxx_compiler()
|
||||
set_kokkos_cxx_standard()
|
||||
|
||||
#------------ GET OPTIONS AND KOKKOS_SETTINGS --------------------------------
|
||||
# Add Kokkos' modules to CMake's module path.
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/")
|
||||
|
||||
set(KOKKOS_CMAKE_VERBOSE True)
|
||||
include(${KOKKOS_SRC_PATH}/cmake/kokkos_options.cmake)
|
||||
|
||||
include(${KOKKOS_SRC_PATH}/cmake/kokkos_settings.cmake)
|
||||
|
||||
#------------ GENERATE HEADER AND SOURCE FILES -------------------------------
|
||||
execute_process(
|
||||
COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings
|
||||
WORKING_DIRECTORY "${Kokkos_BINARY_DIR}"
|
||||
OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out
|
||||
RESULT_VARIABLE res
|
||||
)
|
||||
include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake)
|
||||
set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC})
|
||||
|
||||
#------------ NOW BUILD ------------------------------------------------------
|
||||
include(${KOKKOS_SRC_PATH}/cmake/kokkos_build.cmake)
|
||||
|
||||
#------------ Add in Fake Tribits Handling to allow unit test builds- --------
|
||||
|
||||
include(${KOKKOS_SRC_PATH}/cmake/tribits.cmake)
|
||||
|
||||
TRIBITS_PACKAGE_DECL(Kokkos)
|
||||
|
||||
ADD_SUBDIRECTORY(core)
|
||||
ADD_SUBDIRECTORY(containers)
|
||||
ADD_SUBDIRECTORY(algorithms)
|
||||
|
||||
ELSE()
|
||||
#------------------------------------------------------------------------------
|
||||
#
|
||||
# A) Forward delcare the package so that certain options are also defined for
|
||||
# A) Forward declare the package so that certain options are also defined for
|
||||
# subpackages
|
||||
#
|
||||
|
||||
@ -21,212 +67,28 @@ TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS)
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
#
|
||||
# B) Define the common options for Kokkos first so they can be used by
|
||||
# subpackages as well.
|
||||
# B) Install Kokkos' build files
|
||||
#
|
||||
# If using the Makefile-generated files, then need to set things up.
|
||||
# Here, assume that TriBITS has been run from ProjectCompilerPostConfig.cmake
|
||||
# and already generated KokkosCore_config.h and kokkos_generated_settings.cmake
|
||||
# in the previously define Kokkos_GEN_DIR
|
||||
# We need to copy them over to the correct place and source the cmake file
|
||||
|
||||
# mfh 01 Aug 2016: See Issue #61:
|
||||
#
|
||||
# https://github.com/kokkos/kokkos/issues/61
|
||||
#
|
||||
# Don't use TRIBITS_ADD_DEBUG_OPTION() here, because that defines
|
||||
# HAVE_KOKKOS_DEBUG. We define KOKKOS_HAVE_DEBUG here instead,
|
||||
# for compatibility with Kokkos' Makefile build system.
|
||||
if(NOT KOKKOS_LEGACY_TRIBITS)
|
||||
set(Kokkos_GEN_DIR ${CMAKE_BINARY_DIR})
|
||||
file(COPY "${Kokkos_GEN_DIR}/KokkosCore_config.h"
|
||||
DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS)
|
||||
install(FILES "${Kokkos_GEN_DIR}/KokkosCore_config.h"
|
||||
DESTINATION include)
|
||||
file(COPY "${Kokkos_GEN_DIR}/kokkos_generated_settings.cmake"
|
||||
DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS)
|
||||
|
||||
if (TPL_ENABLE_CUDA)
|
||||
if (DEFINED CUDA_VERSION)
|
||||
# there is a VERSION_GREATER_EQUAL, but only in CMake >= 3.7
|
||||
if (CUDA_VERSION VERSION_EQUAL "7.5")
|
||||
set(KOKKOS_HAVE_CUDA_GEQ_75 TRUE)
|
||||
endif()
|
||||
if (CUDA_VERSION VERSION_GREATER "7.5")
|
||||
set(KOKKOS_HAVE_CUDA_GEQ_75 TRUE)
|
||||
endif()
|
||||
if (CUDA_VERSION VERSION_EQUAL "8.0")
|
||||
set(KOKKOS_HAVE_CUDA_GEQ_80 TRUE)
|
||||
endif()
|
||||
if (CUDA_VERSION VERSION_GREATER "8.0")
|
||||
set(KOKKOS_HAVE_CUDA_GEQ_80 TRUE)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_DEBUG
|
||||
KOKKOS_HAVE_DEBUG
|
||||
"Enable run-time debug checks. These checks may be expensive, so they are disabled by default in a release build."
|
||||
${${PROJECT_NAME}_ENABLE_DEBUG}
|
||||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_SIERRA_BUILD
|
||||
KOKKOS_FOR_SIERRA
|
||||
"Configure Kokkos for building within the Sierra build system."
|
||||
OFF
|
||||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Cuda
|
||||
KOKKOS_HAVE_CUDA
|
||||
"Enable CUDA support in Kokkos."
|
||||
"${KOKKOS_HAVE_CUDA_TPL}"
|
||||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Cuda_UVM
|
||||
KOKKOS_USE_CUDA_UVM
|
||||
"Enable CUDA Unified Virtual Memory as the default in Kokkos."
|
||||
OFF
|
||||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Cuda_RDC
|
||||
KOKKOS_HAVE_CUDA_RDC
|
||||
"Enable CUDA Relocatable Device Code support in Kokkos."
|
||||
OFF
|
||||
)
|
||||
|
||||
set(Kokkos_ENABLE_Cuda_Lambda_DEFAULT OFF)
|
||||
if (Kokkos_ENABLE_Cuda)
|
||||
if (KOKKOS_HAVE_CUDA_GEQ_75)
|
||||
if (CMAKE_CXX_FLAGS MATCHES "-expt-extended-lambda")
|
||||
set(Kokkos_ENABLE_Cuda_Lambda_DEFAULT ON)
|
||||
message("-- CUDA version is >= 7.5 and CMAKE_CXX_FLAGS contains -expt-extended-lambda,")
|
||||
message("-- Kokkos_ENABLE_Cuda_Lambda defaults to ON")
|
||||
else()
|
||||
message("-- CMAKE_CXX_FLAGS doesn't contain -expt-extended-lambda,")
|
||||
message("-- Kokkos_ENABLE_Cuda_Lambda defaults to OFF")
|
||||
endif()
|
||||
else()
|
||||
message("-- CUDA version is < 7.5, Kokkos_ENABLE_Cuda_Lambda defaults to OFF")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Cuda_Lambda
|
||||
KOKKOS_HAVE_CUDA_LAMBDA
|
||||
"Enable CUDA LAMBDA support in Kokkos."
|
||||
"${Kokkos_ENABLE_Cuda_Lambda_DEFAULT}"
|
||||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Pthread
|
||||
KOKKOS_HAVE_PTHREAD
|
||||
"Enable Pthread support in Kokkos."
|
||||
OFF
|
||||
)
|
||||
|
||||
ASSERT_DEFINED(TPL_ENABLE_Pthread)
|
||||
IF(Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread)
|
||||
MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF). This is not allowed. Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.")
|
||||
ENDIF()
|
||||
IF(NOT TPL_ENABLE_Pthread)
|
||||
ADD_DEFINITIONS(-DGTEST_HAS_PTHREAD=0)
|
||||
ENDIF()
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_OpenMP
|
||||
KOKKOS_HAVE_OPENMP
|
||||
"Enable OpenMP support in Kokkos."
|
||||
"${${PROJECT_NAME}_ENABLE_OpenMP}"
|
||||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_QTHREAD
|
||||
KOKKOS_HAVE_QTHREADS
|
||||
"Enable Qthreads support in Kokkos."
|
||||
"${TPL_ENABLE_QTHREAD}"
|
||||
)
|
||||
|
||||
# TODO: No longer an option in Kokkos. Needs to be removed.
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_CXX11
|
||||
KOKKOS_HAVE_CXX11
|
||||
"Enable C++11 support in Kokkos."
|
||||
"${${PROJECT_NAME}_ENABLE_CXX11}"
|
||||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_HWLOC
|
||||
KOKKOS_HAVE_HWLOC
|
||||
"Enable HWLOC support in Kokkos."
|
||||
"${TPL_ENABLE_HWLOC}"
|
||||
)
|
||||
|
||||
# TODO: This is currently not used in Kokkos. Should it be removed?
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_MPI
|
||||
KOKKOS_HAVE_MPI
|
||||
"Enable MPI support in Kokkos."
|
||||
"${TPL_ENABLE_MPI}"
|
||||
)
|
||||
|
||||
# Set default value of Kokkos_ENABLE_Debug_Bounds_Check option
|
||||
#
|
||||
# CMake is case sensitive. The Kokkos_ENABLE_Debug_Bounds_Check
|
||||
# option (defined below) is annoyingly not all caps, but we need to
|
||||
# keep it that way for backwards compatibility. If users forget and
|
||||
# try using an all-caps variable, then make it count by using the
|
||||
# all-caps version as the default value of the original, not-all-caps
|
||||
# option. Otherwise, the default value of this option comes from
|
||||
# Kokkos_ENABLE_DEBUG (see Issue #367).
|
||||
|
||||
ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_DEBUG)
|
||||
IF(DEFINED Kokkos_ENABLE_DEBUG_BOUNDS_CHECK)
|
||||
IF(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK)
|
||||
SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT ON)
|
||||
ELSE()
|
||||
SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}")
|
||||
ENDIF()
|
||||
ELSE()
|
||||
SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}")
|
||||
ENDIF()
|
||||
ASSERT_DEFINED(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Debug_Bounds_Check
|
||||
KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
|
||||
"Enable Kokkos::View run-time bounds checking."
|
||||
"${Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT}"
|
||||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Debug_DualView_Modify_Check
|
||||
KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
|
||||
"Enable abort when Kokkos::DualView modified on host and device without sync."
|
||||
"${Kokkos_ENABLE_DEBUG}"
|
||||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Profiling
|
||||
KOKKOS_ENABLE_PROFILING
|
||||
"Enable KokkosP profiling support for kernel data collections."
|
||||
"${TPL_ENABLE_DLlib}"
|
||||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Profiling_Load_Print
|
||||
KOKKOS_ENABLE_PROFILING_LOAD_PRINT
|
||||
"Print to standard output which profiling library was loaded."
|
||||
OFF
|
||||
)
|
||||
|
||||
# placeholder for future device...
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Winthread
|
||||
KOKKOS_HAVE_WINTHREAD
|
||||
"Enable Winthread support in Kokkos."
|
||||
"${TPL_ENABLE_Winthread}"
|
||||
)
|
||||
|
||||
# TODO: No longer an option in Kokkos. Needs to be removed.
|
||||
# use new/old View
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_USING_DEPRECATED_VIEW
|
||||
KOKKOS_USING_DEPRECATED_VIEW
|
||||
"Choose whether to use the old, deprecated Kokkos::View"
|
||||
OFF
|
||||
)
|
||||
include(${CMAKE_CURRENT_BINARY_DIR}/kokkos_generated_settings.cmake)
|
||||
# Sources come from makefile-generated kokkos_generated_settings.cmake file
|
||||
# Enable using the individual sources if needed
|
||||
set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC})
|
||||
endif ()
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
@ -260,10 +122,6 @@ TRIBITS_PACKAGE_DEF()
|
||||
|
||||
TRIBITS_EXCLUDE_AUTOTOOLS_FILES()
|
||||
|
||||
TRIBITS_EXCLUDE_FILES(
|
||||
classic/doc
|
||||
classic/LinAlg/doc/CrsRefactorNotesMay2012
|
||||
)
|
||||
|
||||
TRIBITS_PACKAGE_POSTPROCESS()
|
||||
|
||||
ENDIF()
|
||||
|
||||
@ -28,33 +28,39 @@ KOKKOS_OPTIONS ?= ""
|
||||
# Options: force_uvm,use_ldg,rdc,enable_lambda
|
||||
KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
|
||||
|
||||
# Return a 1 if a string contains a substring and 0 if not
|
||||
# Note the search string should be without '"'
|
||||
# Example: $(call kokkos_has_string,"hwloc,librt",hwloc)
|
||||
# Will return a 1
|
||||
kokkos_has_string=$(if $(findstring $2,$1),1,0)
|
||||
|
||||
# Check for general settings.
|
||||
KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
|
||||
KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
|
||||
KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l))
|
||||
KOKKOS_INTERNAL_ENABLE_DEBUG := $(call kokkos_has_string,$(KOKKOS_DEBUG),yes)
|
||||
KOKKOS_INTERNAL_ENABLE_CXX11 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++11)
|
||||
KOKKOS_INTERNAL_ENABLE_CXX1Z := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1z)
|
||||
|
||||
# Check for external libraries.
|
||||
KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
|
||||
KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l))
|
||||
KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l))
|
||||
KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc)
|
||||
KOKKOS_INTERNAL_USE_LIBRT := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),librt)
|
||||
KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),experimental_memkind)
|
||||
|
||||
# Check for advanced settings.
|
||||
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "compiler_warnings" | wc -l))
|
||||
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l))
|
||||
KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l))
|
||||
KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_dualview_modify_check" | wc -l))
|
||||
KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "enable_profile_load_print" | wc -l))
|
||||
KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l))
|
||||
KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l))
|
||||
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
|
||||
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l))
|
||||
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
|
||||
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization)
|
||||
KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling)
|
||||
KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check)
|
||||
KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print)
|
||||
KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),use_ldg)
|
||||
KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),force_uvm)
|
||||
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc)
|
||||
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda)
|
||||
|
||||
|
||||
# Check for Kokkos Host Execution Spaces one of which must be on.
|
||||
KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(subst OpenMPTarget,,$(KOKKOS_DEVICES)) | grep OpenMP | wc -l))
|
||||
KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
|
||||
KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l))
|
||||
KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l))
|
||||
KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP)
|
||||
KOKKOS_INTERNAL_USE_PTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread)
|
||||
KOKKOS_INTERNAL_USE_QTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Qthreads)
|
||||
KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
|
||||
@ -65,9 +71,9 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
|
||||
endif
|
||||
|
||||
# Check for other Execution Spaces.
|
||||
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ROCM := $(strip $(shell echo $(KOKKOS_DEVICES) | grep ROCm | wc -l))
|
||||
KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l))
|
||||
KOKKOS_INTERNAL_USE_CUDA := $(call kokkos_has_string,$(KOKKOS_DEVICES),Cuda)
|
||||
KOKKOS_INTERNAL_USE_ROCM := $(call kokkos_has_string,$(KOKKOS_DEVICES),ROCm)
|
||||
KOKKOS_INTERNAL_USE_OPENMPTARGET := $(call kokkos_has_string,$(KOKKOS_DEVICES),OpenMPTarget)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
|
||||
@ -77,25 +83,20 @@ endif
|
||||
|
||||
# Check OS.
|
||||
KOKKOS_OS := $(strip $(shell uname -s))
|
||||
KOKKOS_INTERNAL_OS_CYGWIN := $(strip $(shell uname -s | grep CYGWIN | wc -l))
|
||||
KOKKOS_INTERNAL_OS_LINUX := $(strip $(shell uname -s | grep Linux | wc -l))
|
||||
KOKKOS_INTERNAL_OS_DARWIN := $(strip $(shell uname -s | grep Darwin | wc -l))
|
||||
KOKKOS_INTERNAL_OS_CYGWIN := $(call kokkos_has_string,$(KOKKOS_OS),CYGWIN)
|
||||
KOKKOS_INTERNAL_OS_LINUX := $(call kokkos_has_string,$(KOKKOS_OS),Linux)
|
||||
KOKKOS_INTERNAL_OS_DARWIN := $(call kokkos_has_string,$(KOKKOS_OS),Darwin)
|
||||
|
||||
# Check compiler.
|
||||
KOKKOS_INTERNAL_COMPILER_INTEL := $(strip $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_PGI := $(strip $(shell $(CXX) --version 2>&1 | grep PGI | wc -l))
|
||||
KOKKOS_CXX_VERSION := $(strip $(shell $(CXX) --version 2>&1))
|
||||
KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Intel Corporation)
|
||||
KOKKOS_INTERNAL_COMPILER_PGI := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),PGI)
|
||||
KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(CXX) --version 2>&1 | grep nvcc | wc -l))
|
||||
ifneq ($(OMPI_CXX),)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(OMPI_CXX) --version 2>&1 | grep nvcc | wc -l))
|
||||
endif
|
||||
ifneq ($(MPICH_CXX),)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(MPICH_CXX) --version 2>&1 | grep nvcc | wc -l))
|
||||
endif
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep clang | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep "apple-darwin" | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_HCC := $(strip $(shell $(CXX) --version 2>&1 | grep HCC | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang)
|
||||
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),apple-darwin)
|
||||
KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG = 1
|
||||
@ -209,47 +210,48 @@ endif
|
||||
# Check for Kokkos Architecture settings.
|
||||
|
||||
# Intel based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_WSM := $(strip $(shell echo $(KOKKOS_ARCH) | grep WSM | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KNC := $(call kokkos_has_string,$(KOKKOS_ARCH),KNC)
|
||||
KOKKOS_INTERNAL_USE_ARCH_WSM := $(call kokkos_has_string,$(KOKKOS_ARCH),WSM)
|
||||
KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB)
|
||||
KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW)
|
||||
KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW)
|
||||
KOKKOS_INTERNAL_USE_ARCH_SKX := $(call kokkos_has_string,$(KOKKOS_ARCH),SKX)
|
||||
KOKKOS_INTERNAL_USE_ARCH_KNL := $(call kokkos_has_string,$(KOKKOS_ARCH),KNL)
|
||||
|
||||
# NVIDIA based.
|
||||
NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal60 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler30)
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler32)
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler35)
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler37)
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell50)
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell52)
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell53)
|
||||
KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal61)
|
||||
KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal60)
|
||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
|
||||
|
||||
#SEK: This seems like a bug to me
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell)
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler)
|
||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
|
||||
@ -262,43 +264,43 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
|
||||
endif
|
||||
endif
|
||||
# ARM based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv80)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv81)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX) | bc))
|
||||
|
||||
# IBM based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power9 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(call kokkos_has_string,$(KOKKOS_ARCH),BGQ)
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power7)
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8)
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power9)
|
||||
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
|
||||
|
||||
# AMD based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(strip $(shell echo $(KOKKOS_ARCH) | grep Ryzen | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(strip $(shell echo $(KOKKOS_ARCH) | grep Epyc | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kaveri | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(strip $(shell echo $(KOKKOS_ARCH) | grep Carrizo | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_FIJI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Fiji | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_VEGA := $(strip $(shell echo $(KOKKOS_ARCH) | grep Vega | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(strip $(shell echo $(KOKKOS_ARCH) | grep gfx901 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
|
||||
KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Ryzen)
|
||||
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),Epyc)
|
||||
KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(call kokkos_has_string,$(KOKKOS_ARCH),Kaveri)
|
||||
KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(call kokkos_has_string,$(KOKKOS_ARCH),Carrizo)
|
||||
KOKKOS_INTERNAL_USE_ARCH_FIJI := $(call kokkos_has_string,$(KOKKOS_ARCH),Fiji)
|
||||
KOKKOS_INTERNAL_USE_ARCH_VEGA := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega)
|
||||
KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(call kokkos_has_string,$(KOKKOS_ARCH),gfx901)
|
||||
|
||||
# Any AVX?
|
||||
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
|
||||
|
||||
# Decide what ISA level we are able to support.
|
||||
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM)+$(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER7) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
|
||||
KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
|
||||
|
||||
# Decide whether we can support transactional memory
|
||||
KOKKOS_INTERNAL_USE_TM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
|
||||
|
||||
# Incompatible flags?
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc ))
|
||||
@ -320,94 +322,100 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_WARNINGS)
|
||||
endif
|
||||
|
||||
KOKKOS_LIBS = -lkokkos -ldl
|
||||
KOKKOS_LIBS = -ldl
|
||||
KOKKOS_LDFLAGS = -L$(shell pwd)
|
||||
KOKKOS_SRC =
|
||||
KOKKOS_HEADERS =
|
||||
|
||||
# Generating the KokkosCore_config.h file.
|
||||
|
||||
KOKKOS_INTERNAL_CONFIG_TMP=KokkosCore_config.tmp
|
||||
KOKKOS_CONFIG_HEADER=KokkosCore_config.h
|
||||
# Functions for generating config header file
|
||||
kokkos_append_header = $(shell echo $1 >> $(KOKKOS_INTERNAL_CONFIG_TMP))
|
||||
|
||||
# Do not append first line
|
||||
tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
|
||||
tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp)
|
||||
tmp := $(shell date >> KokkosCore_config.tmp)
|
||||
tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp)
|
||||
tmp := $(call kokkos_append_header,"Makefile constructed configuration:")
|
||||
tmp := $(call kokkos_append_header,"$(shell date)")
|
||||
tmp := $(call kokkos_append_header,"----------------------------------------------*/")
|
||||
|
||||
tmp := $(shell echo '\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)' >> KokkosCore_config.tmp)
|
||||
tmp := $(shell echo '\#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."' >> KokkosCore_config.tmp)
|
||||
tmp := $(shell echo '\#else' >> KokkosCore_config.tmp)
|
||||
tmp := $(shell echo '\#define KOKKOS_CORE_CONFIG_H' >> KokkosCore_config.tmp)
|
||||
tmp := $(shell echo '\#endif' >> KokkosCore_config.tmp)
|
||||
|
||||
tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp)
|
||||
tmp := $(call kokkos_append_header,'\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)')
|
||||
tmp := $(call kokkos_append_header,'\#error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."')
|
||||
tmp := $(call kokkos_append_header,'\#else')
|
||||
tmp := $(call kokkos_append_header,'\#define KOKKOS_CORE_CONFIG_H')
|
||||
tmp := $(call kokkos_append_header,'\#endif')
|
||||
|
||||
tmp := $(call kokkos_append_header,"/* Execution Spaces */")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CUDA")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
|
||||
tmp := $(shell echo '\#define KOKKOS_ENABLE_ROCM 1' >> KokkosCore_config.tmp)
|
||||
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM')
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
|
||||
tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp)
|
||||
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET')
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp)
|
||||
tmp := $(call kokkos_append_header,'\#define KOKKOS_HAVE_OPENMP')
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_PTHREAD")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREADS 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_QTHREADS")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_SERIAL")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_TM), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ENABLE_TM" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TM")
|
||||
tmp := $(call kokkos_append_header,"\#endif")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_X86_64")
|
||||
tmp := $(call kokkos_append_header,"\#endif")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_KNC")
|
||||
tmp := $(call kokkos_append_header,"\#endif")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCLE")
|
||||
tmp := $(call kokkos_append_header,"\#endif")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCBE" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCBE")
|
||||
tmp := $(call kokkos_append_header,"\#endif")
|
||||
endif
|
||||
|
||||
tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
|
||||
tmp := $(call kokkos_append_header,"/* General Settings */")
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CXX11")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CXX11")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CXX1Z")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
|
||||
@ -417,26 +425,26 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
|
||||
|
||||
KOKKOS_CXXFLAGS += -g
|
||||
KOKKOS_LDFLAGS += -g -ldl
|
||||
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_DEBUG")
|
||||
ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0)
|
||||
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK")
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
|
||||
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
|
||||
KOKKOS_LIBS += -lhwloc
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HWLOC")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_LIBRT")
|
||||
KOKKOS_LIBS += -lrt
|
||||
endif
|
||||
|
||||
@ -444,36 +452,36 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
|
||||
KOKKOS_LIBS += -lmemkind -lnuma
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HBWSPACE")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0)
|
||||
tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING")
|
||||
endif
|
||||
|
||||
tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp)
|
||||
tmp := $(call kokkos_append_header,"/* Optimization Settings */")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION")
|
||||
endif
|
||||
|
||||
tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
|
||||
tmp := $(call kokkos_append_header,"/* Cuda Settings */")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LDG_INTRINSIC")
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LDG_INTRINSIC")
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_UVM")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE")
|
||||
KOKKOS_CXXFLAGS += --relocatable-device-code=true
|
||||
KOKKOS_LDFLAGS += --relocatable-device-code=true
|
||||
endif
|
||||
@ -481,7 +489,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LAMBDA")
|
||||
KOKKOS_CXXFLAGS += -expt-extended-lambda
|
||||
else
|
||||
$(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
|
||||
@ -489,19 +497,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LAMBDA")
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_CLANG_WORKAROUND" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_CLANG_WORKAROUND")
|
||||
endif
|
||||
endif
|
||||
|
||||
# Add Architecture flags.
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
@ -518,7 +526,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
@ -535,8 +543,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
@ -553,7 +561,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_SSE42 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_SSE42")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xSSE4.2
|
||||
@ -575,7 +583,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -mavx
|
||||
@ -597,7 +605,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER7 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER7")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
@ -609,7 +617,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER8")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
@ -630,7 +638,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER9")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
@ -651,7 +659,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX2
|
||||
@ -673,7 +681,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX2
|
||||
@ -695,7 +703,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512MIC")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xMIC-AVX512
|
||||
@ -716,7 +724,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512XEON")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX512
|
||||
@ -737,7 +745,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KNC")
|
||||
KOKKOS_CXXFLAGS += -mmic
|
||||
KOKKOS_LDFLAGS += -mmic
|
||||
endif
|
||||
@ -753,48 +761,48 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER30")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER32")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER35")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER37")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL50")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL52")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL53")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL60")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL61")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
|
||||
endif
|
||||
|
||||
@ -811,28 +819,28 @@ endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
|
||||
# Lets start with adding architecture defines
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KAVERI), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 701" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KAVERI 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 701")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KAVERI")
|
||||
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx701
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_CARRIZO), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 801" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_CARRIZO 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 801")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_CARRIZO")
|
||||
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx801
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_FIJI), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 803" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_FIJI 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 803")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_FIJI")
|
||||
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx803
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 900" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_VEGA 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 900")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA")
|
||||
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx900
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_GFX901), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 901" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_GFX901 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 901")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_GFX901")
|
||||
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx901
|
||||
endif
|
||||
|
||||
@ -952,6 +960,10 @@ ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1)
|
||||
KOKKOS_CXXFLAGS += -U__STRICT_ANSI__
|
||||
endif
|
||||
|
||||
# Set KokkosExtraLibs and add -lkokkos to link line
|
||||
KOKKOS_EXTRA_LIBS := ${KOKKOS_LIBS}
|
||||
KOKKOS_LIBS := -lkokkos ${KOKKOS_LIBS}
|
||||
|
||||
# Setting up dependencies.
|
||||
|
||||
KokkosCore_config.h:
|
||||
|
||||
@ -22,8 +22,8 @@ Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokk
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
|
||||
Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
|
||||
Kokkos_Rendezvous.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Rendezvous.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Rendezvous.cpp
|
||||
Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
|
||||
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
|
||||
Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
|
||||
|
||||
@ -41,48 +41,44 @@ hcedwar(at)sandia.gov and crtrott(at)sandia.gov
|
||||
============================================================================
|
||||
|
||||
Primary tested compilers on X86 are:
|
||||
GCC 4.7.2
|
||||
GCC 4.8.4
|
||||
GCC 4.9.2
|
||||
GCC 4.9.3
|
||||
GCC 5.1.0
|
||||
GCC 5.2.0
|
||||
Intel 14.0.4
|
||||
GCC 5.3.0
|
||||
GCC 6.1.0
|
||||
Intel 15.0.2
|
||||
Intel 16.0.1
|
||||
Intel 17.0.098
|
||||
Intel 17.1.132
|
||||
Intel 17.1.043
|
||||
Intel 17.4.196
|
||||
Intel 18.0.128
|
||||
Clang 3.5.2
|
||||
Clang 3.6.1
|
||||
Clang 3.7.1
|
||||
Clang 3.8.1
|
||||
Clang 3.9.0
|
||||
PGI 17.1
|
||||
Clang 4.0.0
|
||||
Clang 4.0.0 for CUDA (CUDA Toolkit 8.0.44)
|
||||
PGI 17.10
|
||||
NVCC 7.0 for CUDA (with gcc 4.8.4)
|
||||
NVCC 7.5 for CUDA (with gcc 4.8.4)
|
||||
NVCC 8.0.44 for CUDA (with gcc 5.3.0)
|
||||
|
||||
Primary tested compilers on Power 8 are:
|
||||
GCC 5.4.0 (OpenMP,Serial)
|
||||
IBM XL 13.1.3 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug)
|
||||
IBM XL 13.1.5 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug)
|
||||
NVCC 8.0.44 for CUDA (with gcc 5.4.0)
|
||||
NVCC 9.0.103 for CUDA (with gcc 6.3.0)
|
||||
|
||||
Primary tested compilers on Intel KNL are:
|
||||
GCC 6.2.0
|
||||
Intel 16.2.181 (with gcc 4.7.2)
|
||||
Intel 17.0.098 (with gcc 4.7.2)
|
||||
Intel 17.1.132 (with gcc 4.9.3)
|
||||
Intel 16.4.258 (with gcc 4.7.2)
|
||||
Intel 17.2.174 (with gcc 4.9.3)
|
||||
Intel 18.0.061 (beta) (with gcc 4.9.3)
|
||||
|
||||
Secondary tested compilers are:
|
||||
CUDA 7.0 (with gcc 4.8.4)
|
||||
CUDA 7.5 (with gcc 4.8.4)
|
||||
CUDA 8.0 (with gcc 5.3.0 on X86 and gcc 5.4.0 on Power8)
|
||||
CUDA/Clang 8.0 using Clang/Trunk compiler
|
||||
Intel 18.0.128 (with gcc 4.9.3)
|
||||
|
||||
Other compilers working:
|
||||
X86:
|
||||
Cygwin 2.1.0 64bit with gcc 4.9.3
|
||||
|
||||
Limited testing of the following compilers on POWER7+ systems:
|
||||
GCC 4.8.5 (on RHEL7.1 POWER7+)
|
||||
|
||||
Known non-working combinations:
|
||||
Power8:
|
||||
Pthreads backend
|
||||
@ -96,8 +92,8 @@ GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits
|
||||
-Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
|
||||
Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
|
||||
Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
|
||||
NVCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
|
||||
|
||||
Secondary compilers are passing without -Werror.
|
||||
Other compilers are tested occasionally, in particular when pushing from develop to
|
||||
master branch, without -Werror and only for a select set of backends.
|
||||
|
||||
|
||||
@ -2,7 +2,9 @@
|
||||
|
||||
TRIBITS_SUBPACKAGE(Algorithms)
|
||||
|
||||
ADD_SUBDIRECTORY(src)
|
||||
IF(KOKKOS_HAS_TRILINOS)
|
||||
ADD_SUBDIRECTORY(src)
|
||||
ENDIF()
|
||||
|
||||
TRIBITS_ADD_TEST_DIRECTORIES(unit_tests)
|
||||
#TRIBITS_ADD_TEST_DIRECTORIES(performance_tests)
|
||||
|
||||
@ -3,6 +3,32 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
|
||||
|
||||
IF(NOT KOKKOS_HAS_TRILINOS)
|
||||
IF(KOKKOS_SEPARATE_LIBS)
|
||||
set(TEST_LINK_TARGETS kokkoscore)
|
||||
ELSE()
|
||||
set(TEST_LINK_TARGETS kokkos)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest)
|
||||
INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR})
|
||||
|
||||
# mfh 03 Nov 2017: The gtest library used here must have a different
|
||||
# name than that of the gtest library built in KokkosCore. We can't
|
||||
# just refer to the library in KokkosCore's tests, because it's
|
||||
# possible to build only (e.g.,) KokkosAlgorithms tests, without
|
||||
# building KokkosCore tests.
|
||||
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0")
|
||||
|
||||
TRIBITS_ADD_LIBRARY(
|
||||
kokkosalgorithms_gtest
|
||||
HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
|
||||
SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
|
||||
TESTONLY
|
||||
)
|
||||
|
||||
SET(SOURCES
|
||||
UnitTestMain.cpp
|
||||
TestCuda.cpp
|
||||
@ -34,5 +60,5 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkosalgorithms_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
|
||||
@ -15,7 +15,8 @@ endif
|
||||
|
||||
CXXFLAGS = -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
LDFLAGS ?=
|
||||
override LDFLAGS += -lpthread
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
|
||||
84
lib/kokkos/benchmarks/benchmark_suite/scripts/build_code.bash
Executable file
84
lib/kokkos/benchmarks/benchmark_suite/scripts/build_code.bash
Executable file
@ -0,0 +1,84 @@
|
||||
#!/bin/bash
|
||||
|
||||
# ---- Default Settings -----
|
||||
|
||||
# Paths
|
||||
KOKKOS_PATH=${PWD}/kokkos
|
||||
KOKKOS_KERNELS_PATH=${PWD}/kokkos-kernels
|
||||
MINIMD_PATH=${PWD}/miniMD/kokkos
|
||||
MINIFE_PATH=${PWD}/miniFE/kokkos
|
||||
|
||||
# Kokkos Configure Options
|
||||
KOKKOS_DEVICES=OpenMP
|
||||
KOKKOS_ARCH=SNB
|
||||
|
||||
# Compiler Options
|
||||
CXX=mpicxx
|
||||
OPT_FLAG="-O3"
|
||||
|
||||
while [[ $# > 0 ]]
|
||||
do
|
||||
key="$1"
|
||||
|
||||
case $key in
|
||||
--kokkos-path*)
|
||||
KOKKOS_PATH="${key#*=}"
|
||||
;;
|
||||
--kokkos-kernels-path*)
|
||||
KOKKOS_KERNELS_PATH="${key#*=}"
|
||||
;;
|
||||
--minimd-path*)
|
||||
MINIMD_PATH="${key#*=}"
|
||||
;;
|
||||
--minife-path*)
|
||||
MINIFE_PATH="${key#*=}"
|
||||
;;
|
||||
--device-list*)
|
||||
KOKKOS_DEVICES="${key#*=}"
|
||||
;;
|
||||
--arch*)
|
||||
KOKKOS_ARCH="--arch=${key#*=}"
|
||||
;;
|
||||
--opt-flag*)
|
||||
OPT_FLAG="${key#*=}"
|
||||
;;
|
||||
--compiler*)
|
||||
CXX="${key#*=}"
|
||||
;;
|
||||
--with-cuda-options*)
|
||||
KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}"
|
||||
;;
|
||||
--help*)
|
||||
PRINT_HELP=True
|
||||
;;
|
||||
*)
|
||||
# args, just append
|
||||
ARGS="$ARGS $1"
|
||||
;;
|
||||
esac
|
||||
|
||||
shift
|
||||
done
|
||||
|
||||
mkdir build
|
||||
|
||||
# Build BytesAndFlops
|
||||
mkdir build/bytes_and_flops
|
||||
cd build/bytes_and_flops
|
||||
make KOKKOS_ARCH=${KOKKOS_ARCH} KOKKOS_DEVICES=${KOKKOS_DEVICES} CXX=${CXX} KOKKOS_PATH=${KOKKOS_PATH}\
|
||||
CXXFLAGS=${OPT_FLAG} -f ${KOKKOS_PATH}/benchmarks/bytes_and_flops/Makefile -j 16
|
||||
cd ../..
|
||||
|
||||
mkdir build/miniMD
|
||||
cd build/miniMD
|
||||
make KOKKOS_ARCH=${KOKKOS_ARCH} KOKKOS_DEVICES=${KOKKOS_DEVICES} CXX=${CXX} KOKKOS_PATH=${KOKKOS_PATH} \
|
||||
CXXFLAGS=${OPT_FLAG} -f ${MINIMD_PATH}/Makefile -j 16
|
||||
cd ../../
|
||||
|
||||
mkdir build/miniFE
|
||||
cd build/miniFE
|
||||
make KOKKOS_ARCH=${KOKKOS_ARCH} KOKKOS_DEVICES=${KOKKOS_DEVICES} CXX=${CXX} KOKKOS_PATH=${KOKKOS_PATH} \
|
||||
CXXFLAGS=${OPT_FLAG} -f ${MINIFE_PATH}/src/Makefile -j 16
|
||||
cd ../../
|
||||
|
||||
|
||||
37
lib/kokkos/benchmarks/benchmark_suite/scripts/checkout_repos.bash
Executable file
37
lib/kokkos/benchmarks/benchmark_suite/scripts/checkout_repos.bash
Executable file
@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Kokkos
|
||||
if [ ! -d "kokkos" ]; then
|
||||
git clone https://github.com/kokkos/kokkos
|
||||
fi
|
||||
cd kokkos
|
||||
git checkout develop
|
||||
git pull
|
||||
cd ..
|
||||
|
||||
# KokkosKernels
|
||||
if [ ! -d "kokkos-kernels" ]; then
|
||||
git clone https://github.com/kokkos/kokkos-kernels
|
||||
fi
|
||||
cd kokkos-kernels
|
||||
git pull
|
||||
cd ..
|
||||
|
||||
# MiniMD
|
||||
if [ ! -d "miniMD" ]; then
|
||||
git clone https://github.com/mantevo/miniMD
|
||||
fi
|
||||
cd miniMD
|
||||
git pull
|
||||
cd ..
|
||||
|
||||
# MiniFE
|
||||
if [ ! -d "miniFE" ]; then
|
||||
git clone https://github.com/mantevo/miniFE
|
||||
fi
|
||||
cd miniFE
|
||||
git pull
|
||||
cd ..
|
||||
|
||||
|
||||
|
||||
14
lib/kokkos/benchmarks/benchmark_suite/scripts/run_benchmark.bash
Executable file
14
lib/kokkos/benchmarks/benchmark_suite/scripts/run_benchmark.bash
Executable file
@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
SCRIPT_PATH=$1
|
||||
KOKKOS_DEVICES=$2
|
||||
KOKKOS_ARCH=$3
|
||||
COMPILER=$4
|
||||
if [[ $# < 4 ]]; then
|
||||
echo "Usage: ./run_benchmark.bash PATH_TO_SCRIPTS KOKKOS_DEVICES KOKKOS_ARCH COMPILER"
|
||||
else
|
||||
|
||||
${SCRIPT_PATH}/checkout_repos.bash
|
||||
${SCRIPT_PATH}/build_code.bash --arch=${KOKKOS_ARCH} --device-list=${KOKKOS_DEVICES} --compiler=${COMPILER}
|
||||
${SCRIPT_PATH}/run_tests.bash
|
||||
|
||||
fi
|
||||
44
lib/kokkos/benchmarks/benchmark_suite/scripts/run_tests.bash
Executable file
44
lib/kokkos/benchmarks/benchmark_suite/scripts/run_tests.bash
Executable file
@ -0,0 +1,44 @@
|
||||
#!/bin/bash
|
||||
|
||||
# BytesAndFlops
|
||||
cd build/bytes_and_flops
|
||||
|
||||
USE_CUDA=`grep "_CUDA 1" KokkosCore_config.h | wc -l`
|
||||
|
||||
if [[ ${USE_CUDA} > 0 ]]; then
|
||||
BAF_EXE=bytes_and_flops.cuda
|
||||
TEAM_SIZE=256
|
||||
else
|
||||
BAF_EXE=bytes_and_flops.host
|
||||
TEAM_SIZE=1
|
||||
fi
|
||||
|
||||
BAF_PERF_1=`./${BAF_EXE} 2 100000 1024 1 1 1 1 ${TEAM_SIZE} 6000 | awk '{print $12/174.5}'`
|
||||
BAF_PERF_2=`./${BAF_EXE} 2 100000 1024 16 1 8 64 ${TEAM_SIZE} 6000 | awk '{print $14/1142.65}'`
|
||||
|
||||
echo "BytesAndFlops: ${BAF_PERF_1} ${BAF_PERF_2}"
|
||||
cd ../..
|
||||
|
||||
|
||||
# MiniMD
|
||||
cd build/miniMD
|
||||
cp ../../miniMD/kokkos/Cu_u6.eam ./
|
||||
MD_PERF_1=`./miniMD --half_neigh 0 -s 60 --ntypes 1 -t ${OMP_NUM_THREADS} -i ../../miniMD/kokkos/in.eam.miniMD | grep PERF_SUMMARY | awk '{print $10/21163341}'`
|
||||
MD_PERF_2=`./miniMD --half_neigh 0 -s 20 --ntypes 1 -t ${OMP_NUM_THREADS} -i ../../miniMD/kokkos/in.eam.miniMD | grep PERF_SUMMARY | awk '{print $10/13393417}'`
|
||||
|
||||
echo "MiniMD: ${MD_PERF_1} ${MD_PERF_2}"
|
||||
cd ../..
|
||||
|
||||
# MiniFE
|
||||
cd build/miniFE
|
||||
rm *.yaml
|
||||
./miniFE.x -nx 100 &> /dev/null
|
||||
FE_PERF_1=`grep "CG Mflop" *.yaml | awk '{print $4/14174}'`
|
||||
rm *.yaml
|
||||
./miniFE.x -nx 50 &> /dev/null
|
||||
FE_PERF_2=`grep "CG Mflop" *.yaml | awk '{print $4/11897}'`
|
||||
cd ../..
|
||||
echo "MiniFE: ${FE_PERF_1} ${FE_PERF_2}"
|
||||
|
||||
PERF_RESULT=`echo "${BAF_PERF_1} ${BAF_PERF_2} ${MD_PERF_1} ${MD_PERF_2} ${FE_PERF_1} ${FE_PERF_2}" | awk '{print ($1+$2+$3+$4+$5+$6)/6}'`
|
||||
echo "Total Result: " ${PERF_RESULT}
|
||||
@ -1,7 +1,18 @@
|
||||
KOKKOS_PATH = ${HOME}/kokkos
|
||||
SRC = $(wildcard *.cpp)
|
||||
KOKKOS_DEVICES=Cuda
|
||||
KOKKOS_CUDA_OPTIONS=enable_lambda
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
|
||||
|
||||
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
|
||||
ifndef KOKKOS_PATH
|
||||
KOKKOS_PATH = $(MAKEFILE_PATH)../..
|
||||
endif
|
||||
|
||||
SRC = $(wildcard $(MAKEFILE_PATH)*.cpp)
|
||||
HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp)
|
||||
|
||||
vpath %.cpp $(sort $(dir $(SRC)))
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
@ -9,22 +20,19 @@ default: build
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
|
||||
EXE = bytes_and_flops.cuda
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
EXE = bytes_and_flops.host
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
CXXFLAGS = -O3 -g
|
||||
CXXFLAGS ?= -O3 -g
|
||||
override CXXFLAGS += -I$(MAKEFILE_PATH)
|
||||
|
||||
DEPFLAGS = -M
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
OBJ = $(notdir $(SRC:.cpp=.o))
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
@ -39,5 +47,5 @@ clean: kokkos-clean
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) bench.hpp bench_unroll_stride.hpp bench_stride.hpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
|
||||
|
||||
@ -69,11 +69,11 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
int team_size, int vector_size, int test_type,
|
||||
ViewType1 &v1, ViewType2 &v2, ViewType3 &v3,
|
||||
double &result, double &result_expect, double &time) {
|
||||
|
||||
|
||||
typedef Kokkos::TeamPolicy<ScheduleType,IndexType> t_policy;
|
||||
typedef typename t_policy::member_type t_team;
|
||||
Kokkos::Timer timer;
|
||||
|
||||
|
||||
for(int orep = 0; orep<outer_repeat; orep++) {
|
||||
|
||||
if (test_type == 100) {
|
||||
@ -95,7 +95,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
v2( idx, t ) = t;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
if (test_type == 111) {
|
||||
@ -178,12 +178,13 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
for (int tr = 0; tr<thread_repeat; ++tr) {
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t, double &lval) {
|
||||
double vector_result = 0.0;
|
||||
for (int vr = 0; vr<inner_repeat; ++vr)
|
||||
for (int vr = 0; vr<inner_repeat; ++vr) {
|
||||
vector_result = 0.0;
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,vector_range), [&] (const int vi, double &vval) {
|
||||
vval += 1;
|
||||
}, vector_result);
|
||||
lval += vector_result;
|
||||
}
|
||||
}, team_result);
|
||||
}
|
||||
v1(idx) = team_result;
|
||||
@ -191,7 +192,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
});
|
||||
}
|
||||
if (test_type == 200) {
|
||||
Kokkos::parallel_reduce("200 outer reduce", t_policy(team_range,team_size),
|
||||
Kokkos::parallel_reduce("200 outer reduce", t_policy(team_range,team_size),
|
||||
KOKKOS_LAMBDA (const t_team& team, double& lval) {
|
||||
lval+=team.team_size()*team.league_rank() + team.team_rank();
|
||||
},result);
|
||||
@ -315,7 +316,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
|
||||
// parallel_for RangePolicy: range = team_size*team_range
|
||||
if (test_type == 300) {
|
||||
Kokkos::parallel_for("300 outer for", team_size*team_range,
|
||||
Kokkos::parallel_for("300 outer for", team_size*team_range,
|
||||
KOKKOS_LAMBDA (const int idx) {
|
||||
v1(idx) = idx;
|
||||
// prevent compiler from optimizing away the loop
|
||||
@ -323,7 +324,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
}
|
||||
// parallel_reduce RangePolicy: range = team_size*team_range
|
||||
if (test_type == 400) {
|
||||
Kokkos::parallel_reduce("400 outer reduce", team_size*team_range,
|
||||
Kokkos::parallel_reduce("400 outer reduce", team_size*team_range,
|
||||
KOKKOS_LAMBDA (const int idx, double& val) {
|
||||
val += idx;
|
||||
}, result);
|
||||
@ -331,7 +332,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
}
|
||||
// parallel_scan RangePolicy: range = team_size*team_range
|
||||
if (test_type == 500) {
|
||||
Kokkos::parallel_scan("500 outer scan", team_size*team_range,
|
||||
Kokkos::parallel_scan("500 outer scan", team_size*team_range,
|
||||
ParallelScanFunctor<ViewType1>(v1)
|
||||
#if 0
|
||||
// This does not compile with pre Cuda 8.0 - see Github Issue #913 for explanation
|
||||
|
||||
@ -26,6 +26,7 @@ fi
|
||||
# Get parent cpuset
|
||||
HPCBIND_HWLOC_PARENT_CPUSET=""
|
||||
if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then
|
||||
HPCBIND_HWLOC_VERSION="$(hwloc-ls --version | cut -d ' ' -f 2)"
|
||||
MY_PID="$BASHPID"
|
||||
HPCBIND_HWLOC_PARENT_CPUSET="$(hwloc-ps -a --cpuset | grep ${MY_PID} | cut -f 2)"
|
||||
fi
|
||||
@ -45,8 +46,11 @@ declare -i NUM_GPUS=0
|
||||
HPCBIND_VISIBLE_GPUS=""
|
||||
if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then
|
||||
NUM_GPUS=$(nvidia-smi -L | wc -l);
|
||||
GPU_LIST="$( seq 0 $((NUM_GPUS-1)) )"
|
||||
HPCBIND_VISIBLE_GPUS=${CUDA_VISIBLE_DEVICES:-${GPU_LIST}}
|
||||
HPCBIND_HAS_NVIDIA=$((!$?))
|
||||
if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then
|
||||
GPU_LIST="$( seq 0 $((NUM_GPUS-1)) )"
|
||||
HPCBIND_VISIBLE_GPUS=${CUDA_VISIBLE_DEVICES:-${GPU_LIST}}
|
||||
fi
|
||||
fi
|
||||
|
||||
declare -i HPCBIND_ENABLE_GPU_MAPPING=$((NUM_GPUS > 0))
|
||||
@ -57,33 +61,38 @@ declare -i HPCBIND_ENABLE_GPU_MAPPING=$((NUM_GPUS > 0))
|
||||
# supports sbatch, bsub, aprun
|
||||
################################################################################
|
||||
HPCBIND_QUEUE_NAME=""
|
||||
declare -i HPCBIND_QUEUE_INDEX=0
|
||||
declare -i HPCBIND_QUEUE_RANK=0
|
||||
declare -i HPCBIND_QUEUE_SIZE=0
|
||||
declare -i HPCBIND_QUEUE_MAPPING=0
|
||||
|
||||
if [[ ! -z "${PMI_RANK}" ]]; then
|
||||
HPCBIND_QUEUE_MAPPING=1
|
||||
HPCBIND_QUEUE_NAME="mpich"
|
||||
HPCBIND_QUEUE_INDEX=${PMI_RANK}
|
||||
HPCBIND_QUEUE_RANK=${PMI_RANK}
|
||||
HPCBIND_QUEUE_SIZE=${PMI_SIZE}
|
||||
elif [[ ! -z "${OMPI_COMM_WORLD_RANK}" ]]; then
|
||||
HPCBIND_QUEUE_MAPPING=1
|
||||
HPCBIND_QUEUE_NAME="openmpi"
|
||||
HPCBIND_QUEUE_INDEX=${OMPI_COMM_WORLD_RANK}
|
||||
HPCBIND_QUEUE_RANK=${OMPI_COMM_WORLD_RANK}
|
||||
HPCBIND_QUEUE_SIZE=${OMPI_COMM_WORLD_SIZE}
|
||||
elif [[ ! -z "${MV2_COMM_WORLD_RANK}" ]]; then
|
||||
HPCBIND_QUEUE_MAPPING=1
|
||||
HPCBIND_QUEUE_NAME="mvapich2"
|
||||
HPCBIND_QUEUE_INDEX=${MV2_COMM_WORLD_RANK}
|
||||
HPCBIND_QUEUE_RANK=${MV2_COMM_WORLD_RANK}
|
||||
HPCBIND_QUEUE_SIZE=${MV2_COMM_WORLD_SIZE}
|
||||
elif [[ ! -z "${SLURM_LOCAL_ID}" ]]; then
|
||||
HPCBIND_QUEUE_MAPPING=1
|
||||
HPCBIND_QUEUE_NAME="slurm"
|
||||
HPCBIND_QUEUE_INDEX=${SLURM_LOCAL_ID}
|
||||
elif [[ ! -z "${LBS_JOBINDEX}" ]]; then
|
||||
HPCBIND_QUEUE_MAPPING=1
|
||||
HPCBIND_QUEUE_NAME="bsub"
|
||||
HPCBIND_QUEUE_INDEX=${LBS_JOBINDEX}
|
||||
HPCBIND_QUEUE_RANK=${SLURM_PROCID}
|
||||
HPCBIND_QUEUE_SIZE=${SLURM_NPROCS}
|
||||
elif [[ ! -z "${ALPS_APP_PE}" ]]; then
|
||||
HPCBIND_QUEUE_MAPPING=1
|
||||
HPCBIND_QUEUE_NAME="aprun"
|
||||
HPCBIND_QUEUE_INDEX=${ALPS_APP_PE}
|
||||
HPCBIND_QUEUE_RANK=${ALPS_APP_PE}
|
||||
elif [[ ! -z "${LBS_JOBINDEX}" ]]; then
|
||||
HPCBIND_QUEUE_MAPPING=1
|
||||
HPCBIND_QUEUE_NAME="bsub"
|
||||
HPCBIND_QUEUE_RANK=${LBS_JOBINDEX}
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
@ -113,8 +122,8 @@ function show_help {
|
||||
echo " --no-gpu-mapping Do not set CUDA_VISIBLE_DEVICES"
|
||||
echo " --openmp=M.m Set env variables for the given OpenMP version"
|
||||
echo " Default: 4.0"
|
||||
echo " --openmp-percent=N Integer percentage of cpuset to use for OpenMP"
|
||||
echo " threads Default: 100"
|
||||
echo " --openmp-ratio=N/D Ratio of the cpuset to use for OpenMP"
|
||||
echo " Default: 1"
|
||||
echo " --openmp-places=<Op> Op=threads|cores|sockets. Default: threads"
|
||||
echo " --no-openmp-proc-bind Set OMP_PROC_BIND to false and unset OMP_PLACES"
|
||||
echo " --force-openmp-num-threads=N"
|
||||
@ -123,8 +132,8 @@ function show_help {
|
||||
echo " Override logic for selecting OMP_PROC_BIND"
|
||||
echo " --no-openmp-nested Set OMP_NESTED to false"
|
||||
echo " --output-prefix=<P> Save the output to files of the form"
|
||||
echo " P-N.log, P-N.out and P-N.err where P is the prefix"
|
||||
echo " and N is the queue index or mpi rank (no spaces)"
|
||||
echo " P.hpcbind.N, P.stdout.N and P.stderr.N where P is "
|
||||
echo " the prefix and N is the rank (no spaces)"
|
||||
echo " --output-mode=<Op> How console output should be handled."
|
||||
echo " Options are all, rank0, and none. Default: rank0"
|
||||
echo " --lstopo Show bindings in lstopo"
|
||||
@ -132,20 +141,27 @@ function show_help {
|
||||
echo " -h|--help Show this message"
|
||||
echo ""
|
||||
echo "Sample Usage:"
|
||||
echo ""
|
||||
echo " Split the current process cpuset into 4 and use the 3rd partition"
|
||||
echo " ${cmd} --distribute=4 --distribute-partition=2 -v -- command ..."
|
||||
echo ""
|
||||
echo " Launch 16 jobs over 4 nodes with 4 jobs per node using only the even pus"
|
||||
echo " and save the output to rank specific files"
|
||||
echo " mpiexec -N 16 -npernode 4 ${cmd} --whole-system --proc-bind=pu:even \\"
|
||||
echo " --distribute=4 -v --output-prefix=output -- command ..."
|
||||
echo ""
|
||||
echo " Bind the process to all even cores"
|
||||
echo " ${cmd} --proc-bind=core:even -v -- command ..."
|
||||
echo ""
|
||||
echo " Bind the the even cores of socket 0 and the odd cores of socket 1"
|
||||
echo " ${cmd} --proc-bind='socket:0.core:even socket:1.core:odd' -v -- command ..."
|
||||
echo ""
|
||||
echo " Skip GPU 0 when mapping visible devices"
|
||||
echo " ${cmd} --distribute=4 --distribute-partition=0 --visible-gpus=1,2 -v -- command ..."
|
||||
echo ""
|
||||
echo " Display the current bindings"
|
||||
echo " ${cmd} --proc-bind=numa:0 -- command"
|
||||
echo ""
|
||||
echo " Display the current bindings using lstopo"
|
||||
echo " ${cmd} --proc-bind=numa:0.core:odd --lstopo"
|
||||
echo ""
|
||||
@ -167,12 +183,13 @@ declare -i HPCBIND_DISTRIBUTE=1
|
||||
declare -i HPCBIND_PARTITION=-1
|
||||
HPCBIND_PROC_BIND="all"
|
||||
HPCBIND_OPENMP_VERSION=4.0
|
||||
declare -i HPCBIND_OPENMP_PERCENT=100
|
||||
declare -i HPCBIND_OPENMP_RATIO_NUMERATOR=1
|
||||
declare -i HPCBIND_OPENMP_RATIO_DENOMINATOR=1
|
||||
HPCBIND_OPENMP_PLACES=${OMP_PLACES:-threads}
|
||||
declare -i HPCBIND_OPENMP_PROC_BIND=1
|
||||
declare -i HPCBIND_OPENMP_FORCE_NUM_THREADS=-1
|
||||
HPCBIND_OPENMP_FORCE_NUM_THREADS=""
|
||||
HPCBIND_OPENMP_FORCE_PROC_BIND=""
|
||||
HPCBIND_OPENMP_NESTED=${OMP_NESTED:-true}
|
||||
declare -i HPCBIND_OPENMP_NESTED=1
|
||||
declare -i HPCBIND_VERBOSE=0
|
||||
|
||||
declare -i HPCBIND_LSTOPO=0
|
||||
@ -199,6 +216,9 @@ for i in "$@"; do
|
||||
;;
|
||||
--distribute=*)
|
||||
HPCBIND_DISTRIBUTE="${i#*=}"
|
||||
if [[ ${HPCBIND_DISTRIBUTE} -le 0 ]]; then
|
||||
HPCBIND_DISTRIBUTE=1
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
# which partition to use
|
||||
@ -222,8 +242,18 @@ for i in "$@"; do
|
||||
HPCBIND_OPENMP_VERSION="${i#*=}"
|
||||
shift
|
||||
;;
|
||||
--openmp-percent=*)
|
||||
HPCBIND_OPENMP_PERCENT="${i#*=}"
|
||||
--openmp-ratio=*)
|
||||
IFS=/ read HPCBIND_OPENMP_RATIO_NUMERATOR HPCBIND_OPENMP_RATIO_DENOMINATOR <<< "${i#*=}"
|
||||
if [[ ${HPCBIND_OPENMP_RATIO_NUMERATOR} -le 0 ]]; then
|
||||
HPCBIND_OPENMP_RATIO_NUMERATOR=1
|
||||
fi
|
||||
if [[ ${HPCBIND_OPENMP_RATIO_DENOMINATOR} -le 0 ]]; then
|
||||
HPCBIND_OPENMP_RATIO_DENOMINATOR=1
|
||||
fi
|
||||
if [[ ${HPCBIND_OPENMP_RATIO_NUMERATOR} -gt ${HPCBIND_OPENMP_RATIO_DENOMINATOR} ]]; then
|
||||
HPCBIND_OPENMP_RATIO_NUMERATOR=1
|
||||
HPCBIND_OPENMP_RATIO_DENOMINATOR=1
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
--openmp-places=*)
|
||||
@ -243,7 +273,7 @@ for i in "$@"; do
|
||||
shift
|
||||
;;
|
||||
--no-openmp-nested)
|
||||
HPCBIND_OPENMP_NESTED="false"
|
||||
HPCBIND_OPENMP_NESTED=0
|
||||
shift
|
||||
;;
|
||||
--output-prefix=*)
|
||||
@ -292,7 +322,7 @@ if [[ "${HPCBIND_OUTPUT_MODE}" == "none" ]]; then
|
||||
HPCBIND_TEE=0
|
||||
elif [[ "${HPCBIND_OUTPUT_MODE}" == "all" ]]; then
|
||||
HPCBIND_TEE=1
|
||||
elif [[ ${HPCBIND_QUEUE_INDEX} -eq 0 ]]; then
|
||||
elif [[ ${HPCBIND_QUEUE_RANK} -eq 0 ]]; then
|
||||
#default to rank0 printing to screen
|
||||
HPCBIND_TEE=1
|
||||
fi
|
||||
@ -303,9 +333,18 @@ if [[ "${HPCBIND_OUTPUT_PREFIX}" == "" ]]; then
|
||||
HPCBIND_ERR=/dev/null
|
||||
HPCBIND_OUT=/dev/null
|
||||
else
|
||||
HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_QUEUE_INDEX}"
|
||||
HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_QUEUE_INDEX}"
|
||||
HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_QUEUE_INDEX}"
|
||||
if [[ ${HPCBIND_QUEUE_SIZE} -gt 0 ]]; then
|
||||
HPCBIND_STR_QUEUE_SIZE="${HPCBIND_QUEUE_SIZE}"
|
||||
HPCBIND_STR_QUEUE_RANK=$(printf %0*d ${#HPCBIND_STR_QUEUE_SIZE} ${HPCBIND_QUEUE_RANK})
|
||||
|
||||
HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_STR_QUEUE_RANK}"
|
||||
HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_STR_QUEUE_RANK}"
|
||||
HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_STR_QUEUE_RANK}"
|
||||
else
|
||||
HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_QUEUE_RANK}"
|
||||
HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_QUEUE_RANK}"
|
||||
HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_QUEUE_RANK}"
|
||||
fi
|
||||
> ${HPCBIND_LOG}
|
||||
fi
|
||||
|
||||
@ -333,27 +372,12 @@ if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then
|
||||
NUM_GPUS=${#HPCBIND_VISIBLE_GPUS[@]}
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
# Check OpenMP percent
|
||||
################################################################################
|
||||
if [[ ${HPCBIND_OPENMP_PERCENT} -lt 1 ]]; then
|
||||
HPCBIND_OPENMP_PERCENT=1
|
||||
elif [[ ${HPCBIND_OPENMP_PERCENT} -gt 100 ]]; then
|
||||
HPCBIND_OPENMP_PERCENT=100
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
# Check distribute
|
||||
################################################################################
|
||||
if [[ ${HPCBIND_DISTRIBUTE} -le 0 ]]; then
|
||||
HPCBIND_DISTRIBUTE=1
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
#choose the correct partition
|
||||
################################################################################
|
||||
if [[ ${HPCBIND_PARTITION} -lt 0 && ${HPCBIND_QUEUE_MAPPING} -eq 1 ]]; then
|
||||
HPCBIND_PARTITION=${HPCBIND_QUEUE_INDEX}
|
||||
HPCBIND_PARTITION=${HPCBIND_QUEUE_RANK}
|
||||
elif [[ ${HPCBIND_PARTITION} -lt 0 ]]; then
|
||||
HPCBIND_PARTITION=0
|
||||
fi
|
||||
@ -381,23 +405,40 @@ if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then
|
||||
else
|
||||
HPCBIND_HWLOC_CPUSET="${BINDING}"
|
||||
fi
|
||||
HPCBIND_NUM_PUS=$(hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} --only pu | wc -l)
|
||||
HPCBIND_NUM_PUS=$(hwloc-calc -q -N pu ${HPCBIND_HWLOC_CPUSET} )
|
||||
if [ $? -ne 0 ]; then
|
||||
HPCBIND_NUM_PUS=1
|
||||
fi
|
||||
HPCBIND_NUM_CORES=$(hwloc-calc -q -N core ${HPCBIND_HWLOC_CPUSET} )
|
||||
if [ $? -ne 0 ]; then
|
||||
HPCBIND_NUM_CORES=1
|
||||
fi
|
||||
HPCBIND_NUM_NUMAS=$(hwloc-calc -q -N numa ${HPCBIND_HWLOC_CPUSET} )
|
||||
if [ $? -ne 0 ]; then
|
||||
HPCBIND_NUM_NUMAS=1
|
||||
fi
|
||||
HPCBIND_NUM_SOCKETS=$(hwloc-calc -q -N socket ${HPCBIND_HWLOC_CPUSET} )
|
||||
if [ $? -ne 0 ]; then
|
||||
HPCBIND_NUM_SOCKETS=1
|
||||
fi
|
||||
else
|
||||
HPCBIND_NUM_PUS=$(cat /proc/cpuinfo | grep -c processor)
|
||||
HPCBIND_NUM_CORES=${HPCBIND_NUM_PUS}
|
||||
HPCBIND_NUM_NUMAS=1
|
||||
HPCBIND_NUM_SOCKETS=1
|
||||
fi
|
||||
|
||||
declare -i HPCBIND_OPENMP_NUM_THREADS=$((HPCBIND_NUM_PUS * HPCBIND_OPENMP_PERCENT))
|
||||
HPCBIND_OPENMP_NUM_THREADS=$((HPCBIND_OPENMP_NUM_THREADS / 100))
|
||||
|
||||
|
||||
if [[ ${HPCBIND_OPENMP_NUM_THREADS} -lt 1 ]]; then
|
||||
HPCBIND_OPENMP_NUM_THREADS=1
|
||||
elif [[ ${HPCBIND_OPENMP_NUM_THREADS} -gt ${HPCBIND_NUM_PUS} ]]; then
|
||||
HPCBIND_OPENMP_NUM_THREADS=${HPCBIND_NUM_PUS}
|
||||
fi
|
||||
|
||||
if [[ ${HPCBIND_OPENMP_FORCE_NUM_THREADS} -gt 0 ]]; then
|
||||
if [[ ${HPCBIND_OPENMP_FORCE_NUM_THREADS} != "" ]]; then
|
||||
HPCBIND_OPENMP_NUM_THREADS=${HPCBIND_OPENMP_FORCE_NUM_THREADS}
|
||||
else
|
||||
declare -i HPCBIND_OPENMP_NUM_THREADS=$((HPCBIND_NUM_PUS * HPCBIND_OPENMP_RATIO_NUMERATOR / HPCBIND_OPENMP_RATIO_DENOMINATOR))
|
||||
|
||||
if [[ ${HPCBIND_OPENMP_NUM_THREADS} -lt 1 ]]; then
|
||||
HPCBIND_OPENMP_NUM_THREADS=1
|
||||
elif [[ ${HPCBIND_OPENMP_NUM_THREADS} -gt ${HPCBIND_NUM_PUS} ]]; then
|
||||
HPCBIND_OPENMP_NUM_THREADS=${HPCBIND_NUM_PUS}
|
||||
fi
|
||||
fi
|
||||
|
||||
################################################################################
|
||||
@ -405,7 +446,11 @@ fi
|
||||
################################################################################
|
||||
|
||||
# set OMP_NUM_THREADS
|
||||
export OMP_NUM_THREADS=${HPCBIND_OPENMP_NUM_THREADS}
|
||||
if [[ ${HPCBIND_OPENMP_NESTED} -eq 1 ]]; then
|
||||
export OMP_NUM_THREADS="${HPCBIND_OPENMP_NUM_THREADS},1"
|
||||
else
|
||||
export OMP_NUM_THREADS=${HPCBIND_OPENMP_NUM_THREADS}
|
||||
fi
|
||||
|
||||
# set OMP_PROC_BIND and OMP_PLACES
|
||||
if [[ ${HPCBIND_OPENMP_PROC_BIND} -eq 1 ]]; then
|
||||
@ -413,7 +458,11 @@ if [[ ${HPCBIND_OPENMP_PROC_BIND} -eq 1 ]]; then
|
||||
#default proc bind logic
|
||||
if [[ "${HPCBIND_OPENMP_VERSION}" == "4.0" || "${HPCBIND_OPENMP_VERSION}" > "4.0" ]]; then
|
||||
export OMP_PLACES="${HPCBIND_OPENMP_PLACES}"
|
||||
export OMP_PROC_BIND="spread"
|
||||
if [[ ${HPCBIND_OPENMP_NESTED} -eq 1 ]]; then
|
||||
export OMP_PROC_BIND="spread,spread"
|
||||
else
|
||||
export OMP_PROC_BIND="spread"
|
||||
fi
|
||||
else
|
||||
export OMP_PROC_BIND="true"
|
||||
unset OMP_PLACES
|
||||
@ -429,9 +478,17 @@ else
|
||||
unset OMP_PROC_BIND
|
||||
fi
|
||||
|
||||
# set OMP_NESTED
|
||||
export OMP_NESTED=${HPCBIND_OPENMP_NESTED}
|
||||
# set up hot teams (intel specific)
|
||||
if [[ ${HPCBIND_OPENMP_NESTED} -eq 1 ]]; then
|
||||
export OMP_NESTED="true"
|
||||
export OMP_MAX_ACTIVE_LEVELS=2
|
||||
export KMP_HOT_TEAMS=1
|
||||
export KMP_HOT_TEAMS_MAX_LEVEL=2
|
||||
else
|
||||
export OMP_NESTED="false"
|
||||
fi
|
||||
|
||||
# set OMP_NESTED
|
||||
|
||||
################################################################################
|
||||
# Set CUDA environment variables
|
||||
@ -442,7 +499,7 @@ if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then
|
||||
declare -i GPU_ID=$((HPCBIND_PARTITION % NUM_GPUS))
|
||||
export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}"
|
||||
else
|
||||
declare -i MY_TASK_ID=$((HPCBIND_QUEUE_INDEX * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION))
|
||||
declare -i MY_TASK_ID=$((HPCBIND_QUEUE_RANK * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION))
|
||||
declare -i GPU_ID=$((MY_TASK_ID % NUM_GPUS))
|
||||
export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}"
|
||||
fi
|
||||
@ -451,12 +508,17 @@ fi
|
||||
################################################################################
|
||||
# Set hpcbind environment variables
|
||||
################################################################################
|
||||
export HPCBIND_HWLOC_VERSION=${HPCBIND_HWLOC_VERSION}
|
||||
export HPCBIND_HAS_HWLOC=${HPCBIND_HAS_HWLOC}
|
||||
export HPCBIND_HAS_NVIDIA=${HPCBIND_HAS_NVIDIA}
|
||||
export HPCBIND_NUM_PUS=${HPCBIND_NUM_PUS}
|
||||
export HPCBIND_NUM_CORES=${HPCBIND_NUM_CORES}
|
||||
export HPCBIND_NUM_NUMAS=${HPCBIND_NUM_NUMAS}
|
||||
export HPCBIND_NUM_SOCKETS=${HPCBIND_NUM_SOCKETS}
|
||||
export HPCBIND_HWLOC_CPUSET="${HPCBIND_HWLOC_CPUSET}"
|
||||
export HPCBIND_HWLOC_DISTRIBUTE=${HPCBIND_DISTRIBUTE}
|
||||
export HPCBIND_HWLOC_DISTRIBUTE_PARTITION=${HPCBIND_PARTITION}
|
||||
export HPCBIND_OPENMP_RATIO="${HPCBIND_OPENMP_RATIO_NUMERATOR}/${HPCBIND_OPENMP_RATIO_DENOMINATOR}"
|
||||
if [[ "${HPCBIND_HWLOC_PARENT_CPUSET}" == "" ]]; then
|
||||
export HPCBIND_HWLOC_PARENT_CPUSET="all"
|
||||
else
|
||||
@ -467,7 +529,8 @@ export HPCBIND_NVIDIA_ENABLE_GPU_MAPPING=${HPCBIND_ENABLE_GPU_MAPPING}
|
||||
export HPCBIND_NVIDIA_VISIBLE_GPUS=$(echo "${HPCBIND_VISIBLE_GPUS[*]}" | tr ' ' ',')
|
||||
export HPCBIND_OPENMP_VERSION="${HPCBIND_OPENMP_VERSION}"
|
||||
if [[ "${HPCBIND_QUEUE_NAME}" != "" ]]; then
|
||||
export HPCBIND_QUEUE_INDEX=${HPCBIND_QUEUE_INDEX}
|
||||
export HPCBIND_QUEUE_RANK=${HPCBIND_QUEUE_RANK}
|
||||
export HPCBIND_QUEUE_SIZE=${HPCBIND_QUEUE_SIZE}
|
||||
export HPCBIND_QUEUE_NAME="${HPCBIND_QUEUE_NAME}"
|
||||
export HPCBIND_QUEUE_MAPPING=${HPCBIND_QUEUE_MAPPING}
|
||||
fi
|
||||
@ -487,10 +550,16 @@ if [[ ${HPCBIND_TEE} -eq 0 || ${HPCBIND_VERBOSE} -eq 0 ]]; then
|
||||
echo "${TMP_ENV}" | grep -E "^CUDA_" >> ${HPCBIND_LOG}
|
||||
echo "[OPENMP]" >> ${HPCBIND_LOG}
|
||||
echo "${TMP_ENV}" | grep -E "^OMP_" >> ${HPCBIND_LOG}
|
||||
echo "[GOMP] (gcc, g++, and gfortran)" >> ${HPCBIND_LOG}
|
||||
echo "${TMP_ENV}" | grep -E "^GOMP_" >> ${HPCBIND_LOG}
|
||||
echo "[KMP] (icc, icpc, and ifort)" >> ${HPCBIND_LOG}
|
||||
echo "${TMP_ENV}" | grep -E "^KMP_" >> ${HPCBIND_LOG}
|
||||
echo "[XLSMPOPTS] (xlc, xlc++, and xlf)" >> ${HPCBIND_LOG}
|
||||
echo "${TMP_ENV}" | grep -E "^XLSMPOPTS" >> ${HPCBIND_LOG}
|
||||
|
||||
if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then
|
||||
echo "[BINDINGS]" >> ${HPCBIND_LOG}
|
||||
hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" --only pu >> ${HPCBIND_LOG}
|
||||
hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" >> ${HPCBIND_LOG}
|
||||
else
|
||||
echo "Unable to show bindings, hwloc not available." >> ${HPCBIND_LOG}
|
||||
fi
|
||||
@ -503,10 +572,16 @@ else
|
||||
echo "${TMP_ENV}" | grep -E "^CUDA_" > >(tee -a ${HPCBIND_LOG})
|
||||
echo "[OPENMP]" > >(tee -a ${HPCBIND_LOG})
|
||||
echo "${TMP_ENV}" | grep -E "^OMP_" > >(tee -a ${HPCBIND_LOG})
|
||||
echo "[GOMP] (gcc, g++, and gfortran)" > >(tee -a ${HPCBIND_LOG})
|
||||
echo "${TMP_ENV}" | grep -E "^GOMP_" > >(tee -a ${HPCBIND_LOG})
|
||||
echo "[KMP] (icc, icpc, and ifort)" > >(tee -a ${HPCBIND_LOG})
|
||||
echo "${TMP_ENV}" | grep -E "^KMP_" > >(tee -a ${HPCBIND_LOG})
|
||||
echo "[XLSMPOPTS] (xlc, xlc++, and xlf)" > >(tee -a ${HPCBIND_LOG})
|
||||
echo "${TMP_ENV}" | grep -E "^XLSMPOPTS" > >(tee -a ${HPCBIND_LOG})
|
||||
|
||||
if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then
|
||||
echo "[BINDINGS]" > >(tee -a ${HPCBIND_LOG})
|
||||
hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" --only pu > >(tee -a ${HPCBIND_LOG})
|
||||
hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" --no-io --no-bridges > >(tee -a ${HPCBIND_LOG})
|
||||
else
|
||||
echo "Unable to show bindings, hwloc not available." > >(tee -a ${HPCBIND_LOG})
|
||||
fi
|
||||
|
||||
@ -39,6 +39,12 @@ cuda_args=""
|
||||
# Arguments for both NVCC and Host compiler
|
||||
shared_args=""
|
||||
|
||||
# Argument -c
|
||||
compile_arg=""
|
||||
|
||||
# Argument -o <obj>
|
||||
output_arg=""
|
||||
|
||||
# Linker arguments
|
||||
xlinker_args=""
|
||||
|
||||
@ -66,6 +72,7 @@ dry_run=0
|
||||
|
||||
# Skip NVCC compilation and use host compiler directly
|
||||
host_only=0
|
||||
host_only_args=""
|
||||
|
||||
# Enable workaround for CUDA 6.5 for pragma ident
|
||||
replace_pragma_ident=0
|
||||
@ -81,6 +88,11 @@ optimization_applied=0
|
||||
# Check if we have -std=c++X or --std=c++X already
|
||||
stdcxx_applied=0
|
||||
|
||||
# Run nvcc a second time to generate dependencies if needed
|
||||
depfile_separate=0
|
||||
depfile_output_arg=""
|
||||
depfile_target_arg=""
|
||||
|
||||
#echo "Arguments: $# $@"
|
||||
|
||||
while [ $# -gt 0 ]
|
||||
@ -112,12 +124,31 @@ do
|
||||
fi
|
||||
;;
|
||||
#Handle shared args (valid for both nvcc and the host compiler)
|
||||
-D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
|
||||
-D*|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
|
||||
shared_args="$shared_args $1"
|
||||
;;
|
||||
#Handle shared args that have an argument
|
||||
-o|-MT)
|
||||
shared_args="$shared_args $1 $2"
|
||||
#Handle compilation argument
|
||||
-c)
|
||||
compile_arg="$1"
|
||||
;;
|
||||
#Handle output argument
|
||||
-o)
|
||||
output_arg="$output_arg $1 $2"
|
||||
shift
|
||||
;;
|
||||
# Handle depfile arguments. We map them to a separate call to nvcc.
|
||||
-MD|-MMD)
|
||||
depfile_separate=1
|
||||
host_only_args="$host_only_args $1"
|
||||
;;
|
||||
-MF)
|
||||
depfile_output_arg="-o $2"
|
||||
host_only_args="$host_only_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
-MT)
|
||||
depfile_target_arg="$1 $2"
|
||||
host_only_args="$host_only_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
#Handle known nvcc args
|
||||
@ -242,7 +273,7 @@ if [ $first_xcompiler_arg -eq 0 ]; then
|
||||
fi
|
||||
|
||||
#Compose host only command
|
||||
host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
|
||||
host_command="$host_compiler $shared_args $host_only_args $compile_arg $output_arg $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
|
||||
|
||||
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
|
||||
if [ $replace_pragma_ident -eq 1 ]; then
|
||||
@ -274,10 +305,21 @@ else
|
||||
host_command="$host_command $object_files"
|
||||
fi
|
||||
|
||||
if [ $depfile_separate -eq 1 ]; then
|
||||
# run nvcc a second time to generate dependencies (without compiling)
|
||||
nvcc_depfile_command="$nvcc_command -M $depfile_target_arg $depfile_output_arg"
|
||||
else
|
||||
nvcc_depfile_command=""
|
||||
fi
|
||||
|
||||
nvcc_command="$nvcc_command $compile_arg $output_arg"
|
||||
|
||||
#Print command for dryrun
|
||||
if [ $dry_run -eq 1 ]; then
|
||||
if [ $host_only -eq 1 ]; then
|
||||
echo $host_command
|
||||
elif [ -n "$nvcc_depfile_command" ]; then
|
||||
echo $nvcc_command "&&" $nvcc_depfile_command
|
||||
else
|
||||
echo $nvcc_command
|
||||
fi
|
||||
@ -287,6 +329,8 @@ fi
|
||||
#Run compilation command
|
||||
if [ $host_only -eq 1 ]; then
|
||||
$host_command
|
||||
elif [ -n "$nvcc_depfile_command" ]; then
|
||||
$nvcc_command && $nvcc_depfile_command
|
||||
else
|
||||
$nvcc_command
|
||||
fi
|
||||
|
||||
8
lib/kokkos/cmake/Makefile.generate_cmake_settings
Normal file
8
lib/kokkos/cmake/Makefile.generate_cmake_settings
Normal file
@ -0,0 +1,8 @@
|
||||
ifndef KOKKOS_PATH
|
||||
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
|
||||
KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH))..
|
||||
endif
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
include $(KOKKOS_PATH)/core/src/Makefile.generate_header_lists
|
||||
include $(KOKKOS_PATH)/core/src/Makefile.generate_build_files
|
||||
File diff suppressed because it is too large
Load Diff
219
lib/kokkos/cmake/kokkos_build.cmake
Normal file
219
lib/kokkos/cmake/kokkos_build.cmake
Normal file
@ -0,0 +1,219 @@
|
||||
# kokkos_generated_settings.cmake includes the kokkos library itself in KOKKOS_LIBS
|
||||
# which we do not want to use for the cmake builds so clean this up
|
||||
string(REGEX REPLACE "-lkokkos" "" KOKKOS_LIBS ${KOKKOS_LIBS})
|
||||
|
||||
############################ Detect if submodule ###############################
|
||||
#
|
||||
# With thanks to StackOverflow:
|
||||
# http://stackoverflow.com/questions/25199677/how-to-detect-if-current-scope-has-a-parent-in-cmake
|
||||
#
|
||||
get_directory_property(HAS_PARENT PARENT_DIRECTORY)
|
||||
if(HAS_PARENT)
|
||||
message(STATUS "Submodule build")
|
||||
SET(KOKKOS_HEADER_DIR "include/kokkos")
|
||||
else()
|
||||
message(STATUS "Standalone build")
|
||||
SET(KOKKOS_HEADER_DIR "include")
|
||||
endif()
|
||||
|
||||
################################ Handle the actual build #######################
|
||||
|
||||
SET(INSTALL_LIB_DIR lib CACHE PATH "Installation directory for libraries")
|
||||
SET(INSTALL_BIN_DIR bin CACHE PATH "Installation directory for executables")
|
||||
SET(INSTALL_INCLUDE_DIR ${KOKKOS_HEADER_DIR} CACHE PATH
|
||||
"Installation directory for header files")
|
||||
IF(WIN32 AND NOT CYGWIN)
|
||||
SET(DEF_INSTALL_CMAKE_DIR CMake)
|
||||
ELSE()
|
||||
SET(DEF_INSTALL_CMAKE_DIR lib/CMake/Kokkos)
|
||||
ENDIF()
|
||||
|
||||
SET(INSTALL_CMAKE_DIR ${DEF_INSTALL_CMAKE_DIR} CACHE PATH
|
||||
"Installation directory for CMake files")
|
||||
|
||||
# Make relative paths absolute (needed later on)
|
||||
FOREACH(p LIB BIN INCLUDE CMAKE)
|
||||
SET(var INSTALL_${p}_DIR)
|
||||
IF(NOT IS_ABSOLUTE "${${var}}")
|
||||
SET(${var} "${CMAKE_INSTALL_PREFIX}/${${var}}")
|
||||
ENDIF()
|
||||
ENDFOREACH()
|
||||
|
||||
# set up include-directories
|
||||
SET (Kokkos_INCLUDE_DIRS
|
||||
${Kokkos_SOURCE_DIR}/core/src
|
||||
${Kokkos_SOURCE_DIR}/containers/src
|
||||
${Kokkos_SOURCE_DIR}/algorithms/src
|
||||
${Kokkos_BINARY_DIR} # to find KokkosCore_config.h
|
||||
${KOKKOS_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
# pass include dirs back to parent scope
|
||||
if(HAS_PARENT)
|
||||
SET(Kokkos_INCLUDE_DIRS_RET ${Kokkos_INCLUDE_DIRS} PARENT_SCOPE)
|
||||
else()
|
||||
SET(Kokkos_INCLUDE_DIRS_RET ${Kokkos_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
INCLUDE_DIRECTORIES(${Kokkos_INCLUDE_DIRS})
|
||||
|
||||
IF(KOKKOS_SEPARATE_LIBS)
|
||||
# Sources come from makefile-generated kokkos_generated_settings.cmake file
|
||||
# Separate libs need to separate the sources
|
||||
set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC})
|
||||
|
||||
# kokkoscore
|
||||
ADD_LIBRARY(
|
||||
kokkoscore
|
||||
${KOKKOS_CORE_SRCS}
|
||||
)
|
||||
|
||||
target_compile_options(
|
||||
kokkoscore
|
||||
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_CXX_FLAGS}>
|
||||
)
|
||||
|
||||
# Install the kokkoscore library
|
||||
INSTALL (TARGETS kokkoscore
|
||||
EXPORT KokkosTargets
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
|
||||
)
|
||||
|
||||
TARGET_LINK_LIBRARIES(
|
||||
kokkoscore
|
||||
${KOKKOS_LD_FLAGS}
|
||||
${KOKKOS_EXTRA_LIBS_LIST}
|
||||
)
|
||||
|
||||
# kokkoscontainers
|
||||
if (DEFINED KOKKOS_CONTAINERS_SRCS)
|
||||
ADD_LIBRARY(
|
||||
kokkoscontainers
|
||||
${KOKKOS_CONTAINERS_SRCS}
|
||||
)
|
||||
endif()
|
||||
|
||||
TARGET_LINK_LIBRARIES(
|
||||
kokkoscontainers
|
||||
kokkoscore
|
||||
)
|
||||
|
||||
# Install the kokkocontainers library
|
||||
INSTALL (TARGETS kokkoscontainers
|
||||
EXPORT KokkosTargets
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
|
||||
|
||||
# kokkosalgorithms - Build as interface library since no source files.
|
||||
ADD_LIBRARY(
|
||||
kokkosalgorithms
|
||||
INTERFACE
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
kokkosalgorithms
|
||||
INTERFACE ${Kokkos_SOURCE_DIR}/algorithms/src
|
||||
)
|
||||
|
||||
TARGET_LINK_LIBRARIES(
|
||||
kokkosalgorithms
|
||||
INTERFACE kokkoscore
|
||||
)
|
||||
|
||||
# Install the kokkoalgorithms library
|
||||
INSTALL (TARGETS kokkosalgorithms
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
|
||||
|
||||
SET (Kokkos_LIBRARIES_NAMES kokkoscore kokkoscontainers kokkosalgorithms)
|
||||
|
||||
ELSE()
|
||||
# kokkos
|
||||
ADD_LIBRARY(
|
||||
kokkos
|
||||
${KOKKOS_CORE_SRCS}
|
||||
${KOKKOS_CONTAINERS_SRCS}
|
||||
)
|
||||
|
||||
target_compile_options(
|
||||
kokkos
|
||||
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_CXX_FLAGS}>
|
||||
)
|
||||
|
||||
TARGET_LINK_LIBRARIES(
|
||||
kokkos
|
||||
${KOKKOS_LD_FLAGS}
|
||||
${KOKKOS_EXTRA_LIBS_LIST}
|
||||
)
|
||||
|
||||
# Install the kokkos library
|
||||
INSTALL (TARGETS kokkos
|
||||
EXPORT KokkosTargets
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
|
||||
|
||||
|
||||
SET (Kokkos_LIBRARIES_NAMES kokkos)
|
||||
|
||||
endif() # KOKKOS_SEPARATE_LIBS
|
||||
|
||||
# Install the kokkos headers
|
||||
INSTALL (DIRECTORY
|
||||
EXPORT KokkosTargets
|
||||
${Kokkos_SOURCE_DIR}/core/src/
|
||||
DESTINATION ${KOKKOS_HEADER_DIR}
|
||||
FILES_MATCHING PATTERN "*.hpp"
|
||||
)
|
||||
INSTALL (DIRECTORY
|
||||
EXPORT KokkosTargets
|
||||
${Kokkos_SOURCE_DIR}/containers/src/
|
||||
DESTINATION ${KOKKOS_HEADER_DIR}
|
||||
FILES_MATCHING PATTERN "*.hpp"
|
||||
)
|
||||
INSTALL (DIRECTORY
|
||||
EXPORT KokkosTargets
|
||||
${Kokkos_SOURCE_DIR}/algorithms/src/
|
||||
DESTINATION ${KOKKOS_HEADER_DIR}
|
||||
FILES_MATCHING PATTERN "*.hpp"
|
||||
)
|
||||
|
||||
INSTALL (FILES
|
||||
${Kokkos_BINARY_DIR}/KokkosCore_config.h
|
||||
DESTINATION ${KOKKOS_HEADER_DIR}
|
||||
)
|
||||
|
||||
# Add all targets to the build-tree export set
|
||||
export(TARGETS ${Kokkos_LIBRARIES_NAMES}
|
||||
FILE "${Kokkos_BINARY_DIR}/KokkosTargets.cmake")
|
||||
|
||||
# Export the package for use from the build-tree
|
||||
# (this registers the build-tree with a global CMake-registry)
|
||||
export(PACKAGE Kokkos)
|
||||
|
||||
# Create the KokkosConfig.cmake and KokkosConfigVersion files
|
||||
file(RELATIVE_PATH REL_INCLUDE_DIR "${INSTALL_CMAKE_DIR}"
|
||||
"${INSTALL_INCLUDE_DIR}")
|
||||
# ... for the build tree
|
||||
set(CONF_INCLUDE_DIRS "${Kokkos_SOURCE_DIR}" "${Kokkos_BINARY_DIR}")
|
||||
configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in
|
||||
"${Kokkos_BINARY_DIR}/KokkosConfig.cmake" @ONLY)
|
||||
# ... for the install tree
|
||||
set(CONF_INCLUDE_DIRS "\${Kokkos_CMAKE_DIR}/${REL_INCLUDE_DIR}")
|
||||
configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in
|
||||
"${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" @ONLY)
|
||||
|
||||
# Install the KokkosConfig.cmake and KokkosConfigVersion.cmake
|
||||
install(FILES
|
||||
"${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake"
|
||||
DESTINATION "${INSTALL_CMAKE_DIR}")
|
||||
|
||||
#This seems not to do anything?
|
||||
#message(STATUS "KokkosTargets: " ${KokkosTargets})
|
||||
# Install the export set for use with the install-tree
|
||||
INSTALL(EXPORT KokkosTargets DESTINATION
|
||||
"${INSTALL_CMAKE_DIR}")
|
||||
345
lib/kokkos/cmake/kokkos_functions.cmake
Normal file
345
lib/kokkos/cmake/kokkos_functions.cmake
Normal file
@ -0,0 +1,345 @@
|
||||
################################### FUNCTIONS ##################################
|
||||
# List of functions
|
||||
# set_kokkos_cxx_compiler
|
||||
# set_kokkos_cxx_standard
|
||||
# set_kokkos_srcs
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# function(set_kokkos_cxx_compiler)
|
||||
# Sets the following compiler variables that are analogous to the CMAKE_*
|
||||
# versions. We add the ability to detect NVCC (really nvcc_wrapper).
|
||||
# KOKKOS_CXX_COMPILER
|
||||
# KOKKOS_CXX_COMPILER_ID
|
||||
# KOKKOS_CXX_COMPILER_VERSION
|
||||
#
|
||||
# Inputs:
|
||||
# KOKKOS_ENABLE_CUDA
|
||||
# CMAKE_CXX_COMPILER
|
||||
# CMAKE_CXX_COMPILER_ID
|
||||
# CMAKE_CXX_COMPILER_VERSION
|
||||
#
|
||||
# Also verifies the compiler version meets the minimum required by Kokkos.
|
||||
function(set_kokkos_cxx_compiler)
|
||||
# Since CMake doesn't recognize the nvcc compiler until 3.8, we use our own
|
||||
# version of the CMake variables and detect nvcc ourselves. Initially set to
|
||||
# the CMake variable values.
|
||||
set(INTERNAL_CXX_COMPILER ${CMAKE_CXX_COMPILER})
|
||||
set(INTERNAL_CXX_COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
|
||||
set(INTERNAL_CXX_COMPILER_VERSION ${CMAKE_CXX_COMPILER_VERSION})
|
||||
|
||||
# Check if the compiler is nvcc (which really means nvcc_wrapper).
|
||||
execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version
|
||||
COMMAND grep nvcc
|
||||
COMMAND wc -l
|
||||
OUTPUT_VARIABLE INTERNAL_HAVE_COMPILER_NVCC
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
string(REGEX REPLACE "^ +" ""
|
||||
INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC})
|
||||
|
||||
if(INTERNAL_HAVE_COMPILER_NVCC)
|
||||
# Set the compiler id to nvcc. We use the value used by CMake 3.8.
|
||||
set(INTERNAL_CXX_COMPILER_ID NVIDIA)
|
||||
|
||||
# Set nvcc's compiler version.
|
||||
execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version
|
||||
COMMAND grep release
|
||||
OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
string(REGEX MATCH "[0-9]+\.[0-9]+\.[0-9]+$"
|
||||
INTERNAL_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION})
|
||||
endif()
|
||||
|
||||
# Enforce the minimum compilers supported by Kokkos.
|
||||
set(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:")
|
||||
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang 3.5.2 or higher")
|
||||
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 4.8.4 or higher")
|
||||
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 15.0.2 or higher")
|
||||
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 7.0.28 or higher")
|
||||
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n PGI 17.1 or higher\n")
|
||||
|
||||
if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang)
|
||||
if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 3.5.2)
|
||||
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
endif()
|
||||
elseif(INTERNAL_CXX_COMPILER_ID STREQUAL GNU)
|
||||
if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.8.4)
|
||||
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
endif()
|
||||
elseif(INTERNAL_CXX_COMPILER_ID STREQUAL Intel)
|
||||
if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 15.0.2)
|
||||
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
endif()
|
||||
elseif(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 7.0.28)
|
||||
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
endif()
|
||||
elseif(INTERNAL_CXX_COMPILER_ID STREQUAL PGI)
|
||||
if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 17.1)
|
||||
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Enforce that extensions are turned off for nvcc_wrapper.
|
||||
if(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON)
|
||||
message(FATAL_ERROR "NVCC doesn't support C++ extensions. Set CMAKE_CXX_EXTENSIONS to OFF in your CMakeLists.txt.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_CUDA)
|
||||
# Enforce that the compiler can compile CUDA code.
|
||||
if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang)
|
||||
if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.0.0)
|
||||
message(FATAL_ERROR "Compiling CUDA code directly with Clang requires version 4.0.0 or higher.")
|
||||
endif()
|
||||
elseif(NOT INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
message(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(KOKKOS_CXX_COMPILER ${INTERNAL_CXX_COMPILER} PARENT_SCOPE)
|
||||
set(KOKKOS_CXX_COMPILER_ID ${INTERNAL_CXX_COMPILER_ID} PARENT_SCOPE)
|
||||
set(KOKKOS_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# function(set_kokkos_cxx_standard)
|
||||
# Transitively enforces that the appropriate CXX standard compile flags (C++11
|
||||
# or above) are added to targets that use the Kokkos library. Compile features
|
||||
# are used if possible. Otherwise, the appropriate flags are added to
|
||||
# KOKKOS_CXX_FLAGS. Values set by the user to CMAKE_CXX_STANDARD and
|
||||
# CMAKE_CXX_EXTENSIONS are honored.
|
||||
#
|
||||
# Outputs:
|
||||
# KOKKOS_CXX11_FEATURES
|
||||
# KOKKOS_CXX_FLAGS
|
||||
#
|
||||
# Inputs:
|
||||
# KOKKOS_CXX_COMPILER
|
||||
# KOKKOS_CXX_COMPILER_ID
|
||||
# KOKKOS_CXX_COMPILER_VERSION
|
||||
#
|
||||
function(set_kokkos_cxx_standard)
|
||||
# The following table lists the versions of CMake that supports CXX_STANDARD
|
||||
# and the CXX compile features for different compilers. The versions are
|
||||
# based on CMake documentation, looking at CMake code, and verifying by
|
||||
# testing with specific CMake versions.
|
||||
#
|
||||
# COMPILER CXX_STANDARD Compile Features
|
||||
# ---------------------------------------------------------------
|
||||
# Clang 3.1 3.1
|
||||
# GNU 3.1 3.2
|
||||
# AppleClang 3.2 3.2
|
||||
# Intel 3.6 3.6
|
||||
# Cray No No
|
||||
# PGI No No
|
||||
# XL No No
|
||||
#
|
||||
# For compiling CUDA code using nvcc_wrapper, we will use the host compiler's
|
||||
# flags for turning on C++11. Since for compiler ID and versioning purposes
|
||||
# CMake recognizes the host compiler when calling nvcc_wrapper, this just
|
||||
# works. Both NVCC and nvcc_wrapper only recognize '-std=c++11' which means
|
||||
# that we can only use host compilers for CUDA builds that use those flags.
|
||||
# It also means that extensions (gnu++11) can't be turned on for CUDA builds.
|
||||
|
||||
# Check if we can use compile features.
|
||||
if(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL Clang)
|
||||
if(NOT CMAKE_VERSION VERSION_LESS 3.1)
|
||||
set(INTERNAL_USE_COMPILE_FEATURES ON)
|
||||
endif()
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang OR CMAKE_CXX_COMPILER_ID STREQUAL GNU)
|
||||
if(NOT CMAKE_VERSION VERSION_LESS 3.2)
|
||||
set(INTERNAL_USE_COMPILE_FEATURES ON)
|
||||
endif()
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
|
||||
if(NOT CMAKE_VERSION VERSION_LESS 3.6)
|
||||
set(INTERNAL_USE_COMPILE_FEATURES ON)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(INTERNAL_USE_COMPILE_FEATURES)
|
||||
# Use the compile features aspect of CMake to transitively cause C++ flags
|
||||
# to populate to user code.
|
||||
|
||||
# I'm using a hack by requiring features that I know force the lowest version
|
||||
# of the compilers we want to support. Clang 3.3 and later support all of
|
||||
# the C++11 standard. With CMake 3.8 and higher, we could switch to using
|
||||
# cxx_std_11.
|
||||
set(KOKKOS_CXX11_FEATURES
|
||||
cxx_nonstatic_member_init # Forces GCC 4.7 or later and Intel 14.0 or later.
|
||||
PARENT_SCOPE
|
||||
)
|
||||
else()
|
||||
# CXX compile features are not yet implemented for this combination of
|
||||
# compiler and version of CMake.
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang)
|
||||
# Versions of CMAKE before 3.2 don't support CXX_STANDARD or C++ compile
|
||||
# features for the AppleClang compiler. Set compiler flags transitively
|
||||
# here such that they trickle down to a call to target_compile_options().
|
||||
|
||||
# The following two blocks of code were copied from
|
||||
# /Modules/Compiler/AppleClang-CXX.cmake from CMake 3.7.2 and then
|
||||
# modified.
|
||||
if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0)
|
||||
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11")
|
||||
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=gnu++11")
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.1)
|
||||
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++14")
|
||||
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++14")
|
||||
elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1)
|
||||
# AppleClang 5.0 knows this flag, but does not set a __cplusplus macro
|
||||
# greater than 201103L.
|
||||
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++1y")
|
||||
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++1y")
|
||||
endif()
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
|
||||
# Versions of CMAKE before 3.6 don't support CXX_STANDARD or C++ compile
|
||||
# features for the Intel compiler. Set compiler flags transitively here
|
||||
# such that they trickle down to a call to target_compile_options().
|
||||
|
||||
# The following three blocks of code were copied from
|
||||
# /Modules/Compiler/Intel-CXX.cmake from CMake 3.7.2 and then modified.
|
||||
if("x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC")
|
||||
set(_std -Qstd)
|
||||
set(_ext c++)
|
||||
else()
|
||||
set(_std -std)
|
||||
set(_ext gnu++)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.2)
|
||||
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++14")
|
||||
# TODO: There is no gnu++14 value supported; figure out what to do.
|
||||
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++14")
|
||||
elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.0)
|
||||
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++1y")
|
||||
# TODO: There is no gnu++14 value supported; figure out what to do.
|
||||
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++1y")
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13.0)
|
||||
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++11")
|
||||
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}11")
|
||||
elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12.1)
|
||||
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++0x")
|
||||
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}0x")
|
||||
endif()
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL Cray)
|
||||
# CMAKE doesn't support CXX_STANDARD or C++ compile features for the Cray
|
||||
# compiler. Set compiler options transitively here such that they trickle
|
||||
# down to a call to target_compile_options().
|
||||
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-hstd=c++11")
|
||||
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-hstd=c++11")
|
||||
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-hstd=c++11")
|
||||
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-hstd=c++11")
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL PGI)
|
||||
# CMAKE doesn't support CXX_STANDARD or C++ compile features for the PGI
|
||||
# compiler. Set compiler options transitively here such that they trickle
|
||||
# down to a call to target_compile_options().
|
||||
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "--c++11")
|
||||
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "--c++11")
|
||||
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "--c++11")
|
||||
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "--c++11")
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL XL)
|
||||
# CMAKE doesn't support CXX_STANDARD or C++ compile features for the XL
|
||||
# compiler. Set compiler options transitively here such that they trickle
|
||||
# down to a call to target_compile_options().
|
||||
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11")
|
||||
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=c++11")
|
||||
set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++11")
|
||||
set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=c++11")
|
||||
else()
|
||||
# Assume GNU. CMAKE_CXX_STANDARD is handled correctly by CMake 3.1 and
|
||||
# above for this compiler. If the user explicitly requests a C++
|
||||
# standard, CMake takes care of it. If not, transitively require C++11.
|
||||
if(NOT CMAKE_CXX_STANDARD)
|
||||
set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION ${CMAKE_CXX11_STANDARD_COMPILE_OPTION})
|
||||
set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION ${CMAKE_CXX11_EXTENSION_COMPILE_OPTION})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Set the C++ standard info for Kokkos respecting user set values for
|
||||
# CMAKE_CXX_STANDARD and CMAKE_CXX_EXTENSIONS.
|
||||
# Only use cxx extension if explicitly requested
|
||||
if(CMAKE_CXX_STANDARD EQUAL 14)
|
||||
if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON)
|
||||
set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_EXTENSION_COMPILE_OPTION})
|
||||
else()
|
||||
set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_STANDARD_COMPILE_OPTION})
|
||||
endif()
|
||||
elseif(CMAKE_CXX_STANDARD EQUAL 11)
|
||||
if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON)
|
||||
set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION})
|
||||
else()
|
||||
set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION})
|
||||
endif()
|
||||
else()
|
||||
# The user didn't explicitly request a standard, transitively require
|
||||
# C++11 respecting CMAKE_CXX_EXTENSIONS.
|
||||
if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON)
|
||||
set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION})
|
||||
else()
|
||||
set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(KOKKOS_CXX_FLAGS ${INTERNAL_CXX_FLAGS} PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# function(set_kokkos_sources)
|
||||
# Takes a list of sources for kokkos (e.g., KOKKOS_SRC from Makefile.kokkos and
|
||||
# put it into kokkos_generated_settings.cmake) and sorts the files into the subpackages or
|
||||
# separate_libraries. This is core and containers (algorithms is pure header
|
||||
# files).
|
||||
#
|
||||
# Inputs:
|
||||
# KOKKOS_SRC
|
||||
#
|
||||
# Outputs:
|
||||
# KOKKOS_CORE_SRCS
|
||||
# KOKKOS_CONTAINERS_SRCS
|
||||
#
|
||||
function(set_kokkos_srcs)
|
||||
set(opts ) # no-value args
|
||||
set(oneValArgs )
|
||||
set(multValArgs KOKKOS_SRC) # e.g., lists
|
||||
cmake_parse_arguments(IN "${opts}" "${oneValArgs}" "${multValArgs}" ${ARGN})
|
||||
|
||||
foreach(sfile ${IN_KOKKOS_SRC})
|
||||
string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" stripfile "${sfile}")
|
||||
string(REPLACE "/" ";" striplist "${stripfile}")
|
||||
list(GET striplist 0 firstdir)
|
||||
if(${firstdir} STREQUAL "core")
|
||||
list(APPEND KOKKOS_CORE_SRCS ${sfile})
|
||||
else()
|
||||
list(APPEND KOKKOS_CONTAINERS_SRCS ${sfile})
|
||||
endif()
|
||||
endforeach()
|
||||
set(KOKKOS_CORE_SRCS ${KOKKOS_CORE_SRCS} PARENT_SCOPE)
|
||||
set(KOKKOS_CONTAINERS_SRCS ${KOKKOS_CONTAINERS_SRCS} PARENT_SCOPE)
|
||||
return()
|
||||
endfunction()
|
||||
|
||||
# Setting a default value if it is not already set
|
||||
macro(set_kokkos_default_default VARIABLE DEFAULT)
|
||||
IF( "${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}" STREQUAL "" )
|
||||
IF( "${KOKKOS_ENABLE_${VARIABLE}}" STREQUAL "" )
|
||||
set(KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT ${DEFAULT})
|
||||
# MESSAGE(WARNING "Set: KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT to ${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}")
|
||||
ELSE()
|
||||
set(KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT ${KOKKOS_ENABLE_${VARIABLE}})
|
||||
# MESSAGE(WARNING "Set: KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT to ${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
UNSET(KOKKOS_ENABLE_${VARIABLE} CACHE)
|
||||
endmacro()
|
||||
365
lib/kokkos/cmake/kokkos_options.cmake
Normal file
365
lib/kokkos/cmake/kokkos_options.cmake
Normal file
@ -0,0 +1,365 @@
|
||||
########################## NOTES ###############################################
|
||||
# List the options for configuring kokkos using CMake method of doing it.
|
||||
# These options then get mapped onto KOKKOS_SETTINGS environment variable by
|
||||
# kokkos_settings.cmake. It is separate to allow other packages to override
|
||||
# these variables (e.g., TriBITS).
|
||||
|
||||
########################## AVAILABLE OPTIONS ###################################
|
||||
# Use lists for documentation, verification, and programming convenience
|
||||
|
||||
# All CMake options of the type KOKKOS_ENABLE_*
|
||||
set(KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST)
|
||||
list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST
|
||||
Serial
|
||||
OpenMP
|
||||
Pthread
|
||||
Qthread
|
||||
Cuda
|
||||
ROCm
|
||||
HWLOC
|
||||
MEMKIND
|
||||
LIBRT
|
||||
Cuda_Lambda
|
||||
Cuda_Relocatable_Device_Code
|
||||
Cuda_UVM
|
||||
Cuda_LDG_Intrinsic
|
||||
Debug
|
||||
Debug_DualView_Modify_Check
|
||||
Debug_Bounds_Checkt
|
||||
Compiler_Warnings
|
||||
Profiling
|
||||
Profiling_Load_Print
|
||||
Aggressive_Vectorization
|
||||
)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
#------------------------------- Recognize CamelCase Options ---------------------------
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST})
|
||||
string(TOUPPER ${opt} OPT )
|
||||
IF(DEFINED Kokkos_ENABLE_${opt})
|
||||
IF(DEFINED KOKKOS_ENABLE_${OPT})
|
||||
IF(NOT ("${KOKKOS_ENABLE_${OPT}}" STREQUAL "${Kokkos_ENABLE_${opt}}"))
|
||||
IF(DEFINED KOKKOS_ENABLE_${OPT}_INTERNAL)
|
||||
MESSAGE(WARNING "Defined both Kokkos_ENABLE_${opt}=[${Kokkos_ENABLE_${opt}}] and KOKKOS_ENABLE_${OPT}=[${KOKKOS_ENABLE_${OPT}}] and they differ! Could be caused by old CMakeCache Variable. Run CMake again and warning should disappear. If not you are truly setting both variables.")
|
||||
IF(NOT ("${Kokkos_ENABLE_${opt}}" STREQUAL "${KOKKOS_ENABLE_${OPT}_INTERNAL}"))
|
||||
UNSET(KOKKOS_ENABLE_${OPT} CACHE)
|
||||
SET(KOKKOS_ENABLE_${OPT} ${Kokkos_ENABLE_${opt}})
|
||||
MESSAGE(WARNING "SET BOTH VARIABLES KOKKOS_ENABLE_${OPT}: ${KOKKOS_ENABLE_${OPT}}")
|
||||
ELSE()
|
||||
SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}})
|
||||
ENDIF()
|
||||
ELSE()
|
||||
MESSAGE(FATAL_ERROR "Defined both Kokkos_ENABLE_${opt}=[${Kokkos_ENABLE_${opt}}] and KOKKOS_ENABLE_${OPT}=[${KOKKOS_ENABLE_${OPT}}] and they differ!")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ELSE()
|
||||
SET(KOKKOS_INTERNAL_ENABLE_${OPT}_DEFAULT ${Kokkos_ENABLE_${opt}})
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
endforeach()
|
||||
|
||||
IF(DEFINED Kokkos_Arch)
|
||||
IF(DEFINED KOKKOS_ARCH)
|
||||
IF(NOT (${KOKKOS_ARCH} STREQUAL "${Kokkos_Arch}"))
|
||||
MESSAGE(FATAL_ERROR "Defined both Kokkos_Arch and KOKKOS_ARCH and they differ!")
|
||||
ENDIF()
|
||||
ELSE()
|
||||
SET(KOKKOS_ARCH ${Kokkos_Arch})
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# List of possible host architectures.
|
||||
#-------------------------------------------------------------------------------
|
||||
set(KOKKOS_ARCH_LIST)
|
||||
list(APPEND KOKKOS_ARCH_LIST
|
||||
None # No architecture optimization
|
||||
AMDAVX # (HOST) AMD chip
|
||||
ARMv80 # (HOST) ARMv8.0 Compatible CPU
|
||||
ARMv81 # (HOST) ARMv8.1 Compatible CPU
|
||||
ARMv8-ThunderX # (HOST) ARMv8 Cavium ThunderX CPU
|
||||
WSM # (HOST) Intel Westmere CPU
|
||||
SNB # (HOST) Intel Sandy/Ivy Bridge CPUs
|
||||
HSW # (HOST) Intel Haswell CPUs
|
||||
BDW # (HOST) Intel Broadwell Xeon E-class CPUs
|
||||
SKX # (HOST) Intel Sky Lake Xeon E-class HPC CPUs (AVX512)
|
||||
KNC # (HOST) Intel Knights Corner Xeon Phi
|
||||
KNL # (HOST) Intel Knights Landing Xeon Phi
|
||||
BGQ # (HOST) IBM Blue Gene Q
|
||||
Power7 # (HOST) IBM POWER7 CPUs
|
||||
Power8 # (HOST) IBM POWER8 CPUs
|
||||
Power9 # (HOST) IBM POWER9 CPUs
|
||||
Kepler # (GPU) NVIDIA Kepler default (generation CC 3.5)
|
||||
Kepler30 # (GPU) NVIDIA Kepler generation CC 3.0
|
||||
Kepler32 # (GPU) NVIDIA Kepler generation CC 3.2
|
||||
Kepler35 # (GPU) NVIDIA Kepler generation CC 3.5
|
||||
Kepler37 # (GPU) NVIDIA Kepler generation CC 3.7
|
||||
Maxwell # (GPU) NVIDIA Maxwell default (generation CC 5.0)
|
||||
Maxwell50 # (GPU) NVIDIA Maxwell generation CC 5.0
|
||||
Maxwell52 # (GPU) NVIDIA Maxwell generation CC 5.2
|
||||
Maxwell53 # (GPU) NVIDIA Maxwell generation CC 5.3
|
||||
Pascal60 # (GPU) NVIDIA Pascal generation CC 6.0
|
||||
Pascal61 # (GPU) NVIDIA Pascal generation CC 6.1
|
||||
)
|
||||
|
||||
# List of possible device architectures.
|
||||
# The case and spelling here needs to match Makefile.kokkos
|
||||
set(KOKKOS_DEVICES_LIST)
|
||||
# Options: Cuda,ROCm,OpenMP,Pthread,Qthreads,Serial
|
||||
list(APPEND KOKKOS_DEVICES_LIST
|
||||
Cuda # NVIDIA GPU -- see below
|
||||
OpenMP # OpenMP
|
||||
Pthread # pthread
|
||||
Qthreads # qthreads
|
||||
Serial # serial
|
||||
ROCm # Relocatable device code
|
||||
)
|
||||
|
||||
# List of possible TPLs for Kokkos
|
||||
# From Makefile.kokkos: Options: hwloc,librt,experimental_memkind
|
||||
set(KOKKOS_USE_TPLS_LIST)
|
||||
list(APPEND KOKKOS_USE_TPLS_LIST
|
||||
HWLOC # hwloc
|
||||
LIBRT # librt
|
||||
MEMKIND # experimental_memkind
|
||||
)
|
||||
# Map of cmake variables to Makefile variables
|
||||
set(KOKKOS_INTERNAL_HWLOC hwloc)
|
||||
set(KOKKOS_INTERNAL_LIBRT librt)
|
||||
set(KOKKOS_INTERNAL_MEMKIND experimental_memkind)
|
||||
|
||||
# List of possible Advanced options
|
||||
set(KOKKOS_OPTIONS_LIST)
|
||||
list(APPEND KOKKOS_OPTIONS_LIST
|
||||
AGGRESSIVE_VECTORIZATION
|
||||
DISABLE_PROFILING
|
||||
DISABLE_DUALVIEW_MODIFY_CHECK
|
||||
ENABLE_PROFILE_LOAD_PRINT
|
||||
)
|
||||
# Map of cmake variables to Makefile variables
|
||||
set(KOKKOS_INTERNAL_LDG_INTRINSIC use_ldg)
|
||||
set(KOKKOS_INTERNAL_UVM librt)
|
||||
set(KOKKOS_INTERNAL_RELOCATABLE_DEVICE_CODE rdc)
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# List of possible Options for CUDA
|
||||
#-------------------------------------------------------------------------------
|
||||
# From Makefile.kokkos: Options: use_ldg,force_uvm,rdc
|
||||
set(KOKKOS_CUDA_OPTIONS_LIST)
|
||||
list(APPEND KOKKOS_CUDA_OPTIONS_LIST
|
||||
LDG_INTRINSIC # use_ldg
|
||||
UVM # force_uvm
|
||||
RELOCATABLE_DEVICE_CODE # rdc
|
||||
LAMBDA # enable_lambda
|
||||
)
|
||||
|
||||
# Map of cmake variables to Makefile variables
|
||||
set(KOKKOS_INTERNAL_LDG_INTRINSIC use_ldg)
|
||||
set(KOKKOS_INTERNAL_UVM force_uvm)
|
||||
set(KOKKOS_INTERNAL_RELOCATABLE_DEVICE_CODE rdc)
|
||||
set(KOKKOS_INTERNAL_LAMBDA enable_lambda)
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
#------------------------------- Create doc strings ----------------------------
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
set(tmpr "\n ")
|
||||
string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_ARCH_DOCSTR "${KOKKOS_ARCH_LIST}")
|
||||
# This would be useful, but we use Foo_ENABLE mechanisms
|
||||
#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_DEVICES_DOCSTR "${KOKKOS_DEVICES_LIST}")
|
||||
#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_USE_TPLS_DOCSTR "${KOKKOS_USE_TPLS_LIST}")
|
||||
#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_CUDA_OPTIONS_DOCSTR "${KOKKOS_CUDA_OPTIONS_LIST}")
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
#------------------------------- GENERAL OPTIONS -------------------------------
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# Setting this variable to a value other than "None" can improve host
|
||||
# performance by turning on architecture specific code.
|
||||
# NOT SET is used to determine if the option is passed in. It is reset to
|
||||
# default "None" down below.
|
||||
set(KOKKOS_ARCH "NOT_SET" CACHE STRING
|
||||
"Optimize for specific host architecture. Options are: ${KOKKOS_INTERNAL_ARCH_DOCSTR}")
|
||||
|
||||
# Whether to build separate libraries or now
|
||||
set(KOKKOS_SEPARATE_LIBS OFF CACHE BOOL "OFF = kokkos. ON = kokkoscore, kokkoscontainers, and kokkosalgorithms.")
|
||||
|
||||
# Qthreads options.
|
||||
set(KOKKOS_QTHREADS_DIR "" CACHE PATH "Location of Qthreads library.")
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
#------------------------------- KOKKOS_DEVICES --------------------------------
|
||||
#-------------------------------------------------------------------------------
|
||||
# Figure out default settings
|
||||
IF(Trilinos_ENABLE_Kokkos)
|
||||
set_kokkos_default_default(SERIAL ON)
|
||||
set_kokkos_default_default(PTHREAD OFF)
|
||||
IF(TPL_ENABLE_QTHREAD)
|
||||
set_kokkos_default_default(QTHREADS ${TPL_ENABLE_QTHREAD})
|
||||
ELSE()
|
||||
set_kokkos_default_default(QTHREADS OFF)
|
||||
ENDIF()
|
||||
IF(Trilinos_ENABLE_OpenMP)
|
||||
set_kokkos_default_default(OPENMP ${Trilinos_ENABLE_OpenMP})
|
||||
ELSE()
|
||||
set_kokkos_default_default(OPENMP OFF)
|
||||
ENDIF()
|
||||
IF(TPL_ENABLE_CUDA)
|
||||
set_kokkos_default_default(CUDA ${TPL_ENABLE_CUDA})
|
||||
ELSE()
|
||||
set_kokkos_default_default(CUDA OFF)
|
||||
ENDIF()
|
||||
set_kokkos_default_default(ROCM OFF)
|
||||
ELSE()
|
||||
set_kokkos_default_default(SERIAL ON)
|
||||
set_kokkos_default_default(OPENMP OFF)
|
||||
set_kokkos_default_default(PTHREAD OFF)
|
||||
set_kokkos_default_default(QTHREAD OFF)
|
||||
set_kokkos_default_default(CUDA OFF)
|
||||
set_kokkos_default_default(ROCM OFF)
|
||||
ENDIF()
|
||||
|
||||
# Set which Kokkos backend to use.
|
||||
# These are the actual options that define the settings.
|
||||
set(KOKKOS_ENABLE_SERIAL ${KOKKOS_INTERNAL_ENABLE_SERIAL_DEFAULT} CACHE BOOL "Whether to enable the Kokkos::Serial device. This device executes \"parallel\" kernels sequentially on a single CPU thread. It is enabled by default. If you disable this device, please enable at least one other CPU device, such as Kokkos::OpenMP or Kokkos::Threads.")
|
||||
set(KOKKOS_ENABLE_OPENMP ${KOKKOS_INTERNAL_ENABLE_OPENMP_DEFAULT} CACHE BOOL "Enable OpenMP support in Kokkos." FORCE)
|
||||
set(KOKKOS_ENABLE_PTHREAD ${KOKKOS_INTERNAL_ENABLE_PTHREAD_DEFAULT} CACHE BOOL "Enable Pthread support in Kokkos.")
|
||||
set(KOKKOS_ENABLE_QTHREADS ${KOKKOS_INTERNAL_ENABLE_QTHREADS_DEFAULT} CACHE BOOL "Enable Qthreads support in Kokkos.")
|
||||
set(KOKKOS_ENABLE_CUDA ${KOKKOS_INTERNAL_ENABLE_CUDA_DEFAULT} CACHE BOOL "Enable CUDA support in Kokkos.")
|
||||
set(KOKKOS_ENABLE_ROCM ${KOKKOS_INTERNAL_ENABLE_ROCM_DEFAULT} CACHE BOOL "Enable ROCm support in Kokkos.")
|
||||
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
#------------------------------- KOKKOS DEBUG and PROFILING --------------------
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# Debug related options enable compiler warnings
|
||||
|
||||
set_kokkos_default_default(DEBUG OFF)
|
||||
set(KOKKOS_ENABLE_DEBUG ${KOKKOS_INTERNAL_ENABLE_DEBUG_DEFAULT} CACHE BOOL "Enable Kokkos Debug.")
|
||||
|
||||
# From Makefile.kokkos: Advanced Options:
|
||||
#compiler_warnings, aggressive_vectorization, disable_profiling, disable_dualview_modify_check, enable_profile_load_print
|
||||
set_kokkos_default_default(COMPILER_WARNINGS OFF)
|
||||
set(KOKKOS_ENABLE_COMPILER_WARNINGS ${KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS_DEFAULT} CACHE BOOL "Enable compiler warnings.")
|
||||
|
||||
set_kokkos_default_default(DEBUG_DUALVIEW_MODIFY_CHECK OFF)
|
||||
set(KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK ${KOKKOS_INTERNAL_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK_DEFAULT} CACHE BOOL "Enable dualview modify check.")
|
||||
|
||||
# Enable aggressive vectorization.
|
||||
set_kokkos_default_default(AGGRESSIVE_VECTORIZATION OFF)
|
||||
set(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION ${KOKKOS_INTERNAL_ENABLE_AGGRESSIVE_VECTORIZATION_DEFAULT} CACHE BOOL "Enable aggressive vectorization.")
|
||||
|
||||
# Enable profiling.
|
||||
set_kokkos_default_default(PROFILING ON)
|
||||
set(KOKKOS_ENABLE_PROFILING ${KOKKOS_INTERNAL_ENABLE_PROFILING_DEFAULT} CACHE BOOL "Enable profiling.")
|
||||
|
||||
set_kokkos_default_default(PROFILING_LOAD_PRINT OFF)
|
||||
set(KOKKOS_ENABLE_PROFILING_LOAD_PRINT ${KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT_DEFAULT} CACHE BOOL "Enable profile load print.")
|
||||
|
||||
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
#------------------------------- KOKKOS_USE_TPLS -------------------------------
|
||||
#-------------------------------------------------------------------------------
|
||||
# Enable hwloc library.
|
||||
# Figure out default:
|
||||
IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HWLOC)
|
||||
set_kokkos_default_default(HWLOC ON)
|
||||
ELSE()
|
||||
set_kokkos_default_default(HWLOC OFF)
|
||||
ENDIF()
|
||||
set(KOKKOS_ENABLE_HWLOC ${KOKKOS_INTERNAL_ENABLE_HWLOC_DEFAULT} CACHE BOOL "Enable hwloc for better process placement.")
|
||||
set(KOKKOS_HWLOC_DIR "" CACHE PATH "Location of hwloc library. (kokkos tpl)")
|
||||
|
||||
# Enable memkind library.
|
||||
set_kokkos_default_default(MEMKIND OFF)
|
||||
set(KOKKOS_ENABLE_MEMKIND ${KOKKOS_INTERNAL_ENABLE_MEMKIND_DEFAULT} CACHE BOOL "Enable memkind. (kokkos tpl)")
|
||||
set(KOKKOS_MEMKIND_DIR "" CACHE PATH "Location of memkind library. (kokkos tpl)")
|
||||
|
||||
# Enable rt library.
|
||||
IF(Trilinos_ENABLE_Kokkos)
|
||||
IF(DEFINED TPL_ENABLE_LIBRT)
|
||||
set_kokkos_default_default(LIBRT ${TPL_ENABLE_LIBRT})
|
||||
ELSE()
|
||||
set_kokkos_default_default(LIBRT OFF)
|
||||
ENDIF()
|
||||
ELSE()
|
||||
set_kokkos_default_default(LIBRT ON)
|
||||
ENDIF()
|
||||
set(KOKKOS_ENABLE_LIBRT ${KOKKOS_INTERNAL_ENABLE_LIBRT_DEFAULT} CACHE BOOL "Enable librt for more precise timer. (kokkos tpl)")
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
#------------------------------- KOKKOS_CUDA_OPTIONS ---------------------------
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# CUDA options.
|
||||
# Set Defaults
|
||||
set_kokkos_default_default(CUDA_LDG_INTRINSIC_DEFAULT OFF)
|
||||
set_kokkos_default_default(CUDA_UVM_DEFAULT OFF)
|
||||
set_kokkos_default_default(CUDA_RELOCATABLE_DEVICE_CODE OFF)
|
||||
IF(Trilinos_ENABLE_Kokkos)
|
||||
IF(KOKKOS_ENABLE_CUDA)
|
||||
find_package(CUDA)
|
||||
ENDIF()
|
||||
IF (DEFINED CUDA_VERSION)
|
||||
IF (CUDA_VERSION VERSION_GREATER "7.0")
|
||||
set_kokkos_default_default(CUDA_LAMBDA ON)
|
||||
ELSE()
|
||||
set_kokkos_default_default(CUDA_LAMBDA OFF)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ELSE()
|
||||
set_kokkos_default_default(CUDA_LAMBDA OFF)
|
||||
ENDIF()
|
||||
|
||||
# Set actual options
|
||||
set(KOKKOS_CUDA_DIR "" CACHE PATH "Location of CUDA library. Defaults to where nvcc installed.")
|
||||
set(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC ${KOKKOS_INTERNAL_ENABLE_CUDA_LDG_INTRINSIC_DEFAULT} CACHE BOOL "Enable CUDA LDG. (cuda option)")
|
||||
set(KOKKOS_ENABLE_CUDA_UVM ${KOKKOS_INTERNAL_ENABLE_CUDA_UVM_DEFAULT} CACHE BOOL "Enable CUDA unified virtual memory.")
|
||||
set(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE ${KOKKOS_INTERNAL_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE_DEFAULT} CACHE BOOL "Enable relocatable device code for CUDA. (cuda option)")
|
||||
set(KOKKOS_ENABLE_CUDA_LAMBDA ${KOKKOS_INTERNAL_ENABLE_CUDA_LAMBDA_DEFAULT} CACHE BOOL "Enable lambdas for CUDA. (cuda option)")
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
#----------------------- HOST ARCH AND LEGACY TRIBITS --------------------------
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# This defines the previous legacy TriBITS builds.
|
||||
set(KOKKOS_LEGACY_TRIBITS False)
|
||||
IF ("${KOKKOS_ARCH}" STREQUAL "NOT_SET")
|
||||
set(KOKKOS_ARCH "None")
|
||||
IF(KOKKOS_HAS_TRILINOS)
|
||||
set(KOKKOS_LEGACY_TRIBITS True)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
IF (KOKKOS_HAS_TRILINOS)
|
||||
IF (KOKKOS_LEGACY_TRIBITS)
|
||||
message(STATUS "Using the legacy tribits build because KOKKOS_ARCH not set")
|
||||
ELSE()
|
||||
message(STATUS "NOT using the legacy tribits build because KOKKOS_ARCH *is* set")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
#----------------------- Set CamelCase Options if they are not yet set ---------
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST})
|
||||
string(TOUPPER ${opt} OPT )
|
||||
UNSET(KOKKOS_ENABLE_${OPT}_INTERNAL CACHE)
|
||||
SET(KOKKOS_ENABLE_${OPT}_INTERNAL ${KOKKOS_ENABLE_${OPT}} CACHE BOOL INTERNAL)
|
||||
IF(DEFINED KOKKOS_ENABLE_${OPT})
|
||||
UNSET(Kokkos_ENABLE_${opt} CACHE)
|
||||
SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}} CACHE BOOL "CamelCase Compatibility setting for KOKKOS_ENABLE_${OPT}")
|
||||
ENDIF()
|
||||
endforeach()
|
||||
|
||||
257
lib/kokkos/cmake/kokkos_settings.cmake
Normal file
257
lib/kokkos/cmake/kokkos_settings.cmake
Normal file
@ -0,0 +1,257 @@
|
||||
########################## NOTES ###############################################
|
||||
# This files goal is to take CMake options found in kokkos_options.cmake but
|
||||
# possibly set from elsewhere
|
||||
# (see: trilinos/cmake/ProjectCOmpilerPostConfig.cmake)
|
||||
# using CMake idioms and map them onto the KOKKOS_SETTINGS variables that gets
|
||||
# passed to the kokkos makefile configuration:
|
||||
# make -f ${CMAKE_SOURCE_DIR}/core/src/Makefile ${KOKKOS_SETTINGS} build-makefile-cmake-kokkos
|
||||
# that generates KokkosCore_config.h and kokkos_generated_settings.cmake
|
||||
# To understand how to form KOKKOS_SETTINGS, see
|
||||
# <KOKKOS_PATH>/Makefile.kokkos
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
#------------------------------- GENERAL OPTIONS -------------------------------
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# Ensure that KOKKOS_ARCH is in the ARCH_LIST
|
||||
foreach(arch ${KOKKOS_ARCH})
|
||||
list(FIND KOKKOS_ARCH_LIST ${arch} indx)
|
||||
if (indx EQUAL -1)
|
||||
message(FATAL_ERROR "${arch} is not an accepted value for KOKKOS_ARCH."
|
||||
" Please pick from these choices: ${KOKKOS_INTERNAL_ARCH_DOCSTR}")
|
||||
endif ()
|
||||
endforeach()
|
||||
|
||||
# KOKKOS_SETTINGS uses KOKKOS_ARCH
|
||||
string(REPLACE ";" "," KOKKOS_ARCH "${KOKKOS_ARCH}")
|
||||
set(KOKKOS_ARCH ${KOKKOS_ARCH})
|
||||
|
||||
# From Makefile.kokkos: Options: yes,no
|
||||
if(${KOKKOS_ENABLE_DEBUG})
|
||||
set(KOKKOS_DEBUG yes)
|
||||
else()
|
||||
set(KOKKOS_DEBUG no)
|
||||
endif()
|
||||
|
||||
#------------------------------- KOKKOS_DEVICES --------------------------------
|
||||
# Can have multiple devices
|
||||
set(KOKKOS_DEVICESl)
|
||||
foreach(devopt ${KOKKOS_DEVICES_LIST})
|
||||
string(TOUPPER ${devopt} devoptuc)
|
||||
if (${KOKKOS_ENABLE_${devoptuc}})
|
||||
list(APPEND KOKKOS_DEVICESl ${devopt})
|
||||
endif ()
|
||||
endforeach()
|
||||
# List needs to be comma-delmitted
|
||||
string(REPLACE ";" "," KOKKOS_DEVICES "${KOKKOS_DEVICESl}")
|
||||
|
||||
#------------------------------- KOKKOS_OPTIONS --------------------------------
|
||||
# From Makefile.kokkos: Options: aggressive_vectorization,disable_profiling
|
||||
#compiler_warnings, aggressive_vectorization, disable_profiling, disable_dualview_modify_check, enable_profile_load_print
|
||||
|
||||
set(KOKKOS_OPTIONSl)
|
||||
if(${KOKKOS_ENABLE_COMPILER_WARNINGS})
|
||||
list(APPEND KOKKOS_OPTIONSl compiler_warnings)
|
||||
endif()
|
||||
if(${KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION})
|
||||
list(APPEND KOKKOS_OPTIONSl aggressive_vectorization)
|
||||
endif()
|
||||
if(NOT ${KOKKOS_ENABLE_PROFILING})
|
||||
list(APPEND KOKKOS_OPTIONSl disable_vectorization)
|
||||
endif()
|
||||
if(NOT ${KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK})
|
||||
list(APPEND KOKKOS_OPTIONSl disable_dualview_modify_check)
|
||||
endif()
|
||||
if(${KOKKOS_ENABLE_PROFILING_LOAD_PRINT})
|
||||
list(APPEND KOKKOS_OPTIONSl enable_profile_load_print)
|
||||
endif()
|
||||
# List needs to be comma-delimitted
|
||||
string(REPLACE ";" "," KOKKOS_OPTIONS "${KOKKOS_OPTIONSl}")
|
||||
|
||||
|
||||
#------------------------------- KOKKOS_USE_TPLS -------------------------------
|
||||
# Construct the Makefile options
|
||||
set(KOKKOS_USE_TPLSl)
|
||||
foreach(tplopt ${KOKKOS_USE_TPLS_LIST})
|
||||
if (${KOKKOS_ENABLE_${tplopt}})
|
||||
list(APPEND KOKKOS_USE_TPLSl ${KOKKOS_INTERNAL_${tplopt}})
|
||||
endif ()
|
||||
endforeach()
|
||||
# List needs to be comma-delimitted
|
||||
string(REPLACE ";" "," KOKKOS_USE_TPLS "${KOKKOS_USE_TPLSl}")
|
||||
|
||||
|
||||
#------------------------------- KOKKOS_CUDA_OPTIONS ---------------------------
|
||||
# Construct the Makefile options
|
||||
set(KOKKOS_CUDA_OPTIONS)
|
||||
foreach(cudaopt ${KOKKOS_CUDA_OPTIONS_LIST})
|
||||
if (${KOKKOS_ENABLE_CUDA_${cudaopt}})
|
||||
list(APPEND KOKKOS_CUDA_OPTIONSl ${KOKKOS_INTERNAL_${cudaopt}})
|
||||
endif ()
|
||||
endforeach()
|
||||
# List needs to be comma-delmitted
|
||||
string(REPLACE ";" "," KOKKOS_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}")
|
||||
|
||||
#------------------------------- PATH VARIABLES --------------------------------
|
||||
# Want makefile to use same executables specified which means modifying
|
||||
# the path so the $(shell ...) commands in the makefile see the right exec
|
||||
# Also, the Makefile's use FOO_PATH naming scheme for -I/-L construction
|
||||
#TODO: Makefile.kokkos allows this to be overwritten? ROCM_HCC_PATH
|
||||
|
||||
set(KOKKOS_INTERNAL_PATHS)
|
||||
set(addpathl)
|
||||
foreach(kvar "CUDA;QTHREADS;${KOKKOS_USE_TPLS_LIST}")
|
||||
if(${KOKKOS_ENABLE_${kvar}})
|
||||
if(DEFINED KOKKOS_${kvar}_DIR)
|
||||
set(KOKKOS_INTERNAL_PATHS "${KOKKOS_INTERNAL_PATHS} ${kvar}_PATH=${KOKKOS_${kvar}_DIR}")
|
||||
if(IS_DIRECTORY ${KOKKOS_${kvar}_DIR}/bin)
|
||||
list(APPEND addpathl ${KOKKOS_${kvar}_DIR}/bin)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
# Path env is : delimitted
|
||||
string(REPLACE ";" ":" KOKKOS_INTERNAL_ADDTOPATH "${addpathl}")
|
||||
|
||||
|
||||
######################### SET KOKKOS_SETTINGS ##################################
|
||||
# Set the KOKKOS_SETTINGS String -- this is the primary communication with the
|
||||
# makefile configuration. See Makefile.kokkos
|
||||
|
||||
set(KOKKOS_SETTINGS KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH})
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_PATH=${KOKKOS_PATH})
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_INSTALL_PATH=${CMAKE_INSTALL_PREFIX})
|
||||
|
||||
# Form of KOKKOS_foo=$KOKKOS_foo
|
||||
foreach(kvar ARCH;DEVICES;DEBUG;OPTIONS;CUDA_OPTIONS;USE_TPLS)
|
||||
set(KOKKOS_VAR KOKKOS_${kvar})
|
||||
if(DEFINED KOKKOS_${kvar})
|
||||
if (NOT "${${KOKKOS_VAR}}" STREQUAL "")
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_VAR}=${${KOKKOS_VAR}})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Form of VAR=VAL
|
||||
#TODO: Makefile supports MPICH_CXX, OMPI_CXX as well
|
||||
foreach(ovar CXX;CXXFLAGS;LDFLAGS)
|
||||
if(DEFINED ${ovar})
|
||||
if (NOT "${${ovar}}" STREQUAL "")
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${ovar}=${${ovar}})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Finally, do the paths
|
||||
if (NOT "${KOKKOS_INTERNAL_PATHS}" STREQUAL "")
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_INTERNAL_PATHS})
|
||||
endif()
|
||||
if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "")
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} PATH=${KOKKOS_INTERNAL_ADDTOPATH}:\${PATH})
|
||||
endif()
|
||||
|
||||
# Final form that gets passed to make
|
||||
set(KOKKOS_SETTINGS env ${KOKKOS_SETTINGS})
|
||||
|
||||
|
||||
############################ PRINT CONFIGURE STATUS ############################
|
||||
|
||||
if(KOKKOS_CMAKE_VERBOSE)
|
||||
message(STATUS "")
|
||||
message(STATUS "****************** Kokkos Settings ******************")
|
||||
message(STATUS "Execution Spaces")
|
||||
|
||||
if(KOKKOS_ENABLE_CUDA)
|
||||
message(STATUS " Device Parallel: Cuda")
|
||||
else()
|
||||
message(STATUS " Device Parallel: None")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_OPENMP)
|
||||
message(STATUS " Host Parallel: OpenMP")
|
||||
elseif(KOKKOS_ENABLE_PTHREAD)
|
||||
message(STATUS " Host Parallel: Pthread")
|
||||
elseif(KOKKOS_ENABLE_QTHREADS)
|
||||
message(STATUS " Host Parallel: Qthreads")
|
||||
else()
|
||||
message(STATUS " Host Parallel: None")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_SERIAL)
|
||||
message(STATUS " Host Serial: Serial")
|
||||
else()
|
||||
message(STATUS " Host Serial: None")
|
||||
endif()
|
||||
|
||||
message(STATUS "")
|
||||
message(STATUS "Architectures:")
|
||||
message(STATUS " ${KOKKOS_ARCH}")
|
||||
|
||||
message(STATUS "")
|
||||
message(STATUS "Enabled options")
|
||||
|
||||
if(KOKKOS_SEPARATE_LIBS)
|
||||
message(STATUS " KOKKOS_SEPARATE_LIBS")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_HWLOC)
|
||||
message(STATUS " KOKKOS_ENABLE_HWLOC")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_MEMKIND)
|
||||
message(STATUS " KOKKOS_ENABLE_MEMKIND")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_DEBUG)
|
||||
message(STATUS " KOKKOS_ENABLE_DEBUG")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_PROFILING)
|
||||
message(STATUS " KOKKOS_ENABLE_PROFILING")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION)
|
||||
message(STATUS " KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_CUDA)
|
||||
if(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC)
|
||||
message(STATUS " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_CUDA_UVM)
|
||||
message(STATUS " KOKKOS_ENABLE_CUDA_UVM")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
|
||||
message(STATUS " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_CUDA_LAMBDA)
|
||||
message(STATUS " KOKKOS_ENABLE_CUDA_LAMBDA")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_CUDA_DIR)
|
||||
message(STATUS " KOKKOS_CUDA_DIR: ${KOKKOS_CUDA_DIR}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(KOKKOS_QTHREADS_DIR)
|
||||
message(STATUS " KOKKOS_QTHREADS_DIR: ${KOKKOS_QTHREADS_DIR}")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_HWLOC_DIR)
|
||||
message(STATUS " KOKKOS_HWLOC_DIR: ${KOKKOS_HWLOC_DIR}")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_MEMKIND_DIR)
|
||||
message(STATUS " KOKKOS_MEMKIND_DIR: ${KOKKOS_MEMKIND_DIR}")
|
||||
endif()
|
||||
|
||||
message(STATUS "")
|
||||
message(STATUS "Final kokkos settings variable:")
|
||||
message(STATUS " ${KOKKOS_SETTINGS}")
|
||||
|
||||
message(STATUS "*****************************************************")
|
||||
message(STATUS "")
|
||||
endif()
|
||||
@ -3,10 +3,6 @@ INCLUDE(CTest)
|
||||
|
||||
cmake_policy(SET CMP0054 NEW)
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME})
|
||||
project(KokkosCMake)
|
||||
ENDIF()
|
||||
|
||||
MESSAGE(WARNING "The project name is: ${PROJECT_NAME}")
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP)
|
||||
@ -46,26 +42,26 @@ MACRO(PREPEND_GLOBAL_SET VARNAME)
|
||||
GLOBAL_SET(${VARNAME} ${ARGN} ${${VARNAME}})
|
||||
ENDMACRO()
|
||||
|
||||
FUNCTION(REMOVE_GLOBAL_DUPLICATES VARNAME)
|
||||
ASSERT_DEFINED(${VARNAME})
|
||||
IF (${VARNAME})
|
||||
SET(TMP ${${VARNAME}})
|
||||
LIST(REMOVE_DUPLICATES TMP)
|
||||
GLOBAL_SET(${VARNAME} ${TMP})
|
||||
ENDIF()
|
||||
ENDFUNCTION()
|
||||
#FUNCTION(REMOVE_GLOBAL_DUPLICATES VARNAME)
|
||||
# ASSERT_DEFINED(${VARNAME})
|
||||
# IF (${VARNAME})
|
||||
# SET(TMP ${${VARNAME}})
|
||||
# LIST(REMOVE_DUPLICATES TMP)
|
||||
# GLOBAL_SET(${VARNAME} ${TMP})
|
||||
# ENDIF()
|
||||
#ENDFUNCTION()
|
||||
|
||||
MACRO(TRIBITS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE)
|
||||
MESSAGE(STATUS "TRIBITS_ADD_OPTION_AND_DEFINE: '${USER_OPTION_NAME}' '${MACRO_DEFINE_NAME}' '${DEFAULT_VALUE}'")
|
||||
SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" )
|
||||
IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "")
|
||||
IF(${USER_OPTION_NAME})
|
||||
GLOBAL_SET(${MACRO_DEFINE_NAME} ON)
|
||||
ELSE()
|
||||
GLOBAL_SET(${MACRO_DEFINE_NAME} OFF)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDMACRO()
|
||||
#MACRO(TRIBITS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE)
|
||||
# MESSAGE(STATUS "TRIBITS_ADD_OPTION_AND_DEFINE: '${USER_OPTION_NAME}' '${MACRO_DEFINE_NAME}' '${DEFAULT_VALUE}'")
|
||||
# SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" )
|
||||
# IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "")
|
||||
# IF(${USER_OPTION_NAME})
|
||||
# GLOBAL_SET(${MACRO_DEFINE_NAME} ON)
|
||||
# ELSE()
|
||||
# GLOBAL_SET(${MACRO_DEFINE_NAME} OFF)
|
||||
# ENDIF()
|
||||
# ENDIF()
|
||||
#ENDMACRO()
|
||||
|
||||
FUNCTION(TRIBITS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE)
|
||||
|
||||
@ -77,17 +73,20 @@ FUNCTION(TRIBITS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE)
|
||||
|
||||
ENDFUNCTION()
|
||||
|
||||
MACRO(TRIBITS_ADD_DEBUG_OPTION)
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
${PROJECT_NAME}_ENABLE_DEBUG
|
||||
HAVE_${PROJECT_NAME_UC}_DEBUG
|
||||
"Enable a host of runtime debug checking."
|
||||
OFF
|
||||
)
|
||||
ENDMACRO()
|
||||
#MACRO(TRIBITS_ADD_DEBUG_OPTION)
|
||||
# TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
# ${PROJECT_NAME}_ENABLE_DEBUG
|
||||
# HAVE_${PROJECT_NAME_UC}_DEBUG
|
||||
# "Enable a host of runtime debug checking."
|
||||
# OFF
|
||||
# )
|
||||
#ENDMACRO()
|
||||
|
||||
|
||||
MACRO(TRIBITS_ADD_TEST_DIRECTORIES)
|
||||
message(STATUS "ProjectName: " ${PROJECT_NAME})
|
||||
message(STATUS "Tests: " ${${PROJECT_NAME}_ENABLE_TESTS})
|
||||
|
||||
IF(${${PROJECT_NAME}_ENABLE_TESTS})
|
||||
FOREACH(TEST_DIR ${ARGN})
|
||||
ADD_SUBDIRECTORY(${TEST_DIR})
|
||||
@ -387,17 +386,17 @@ FUNCTION(TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME)
|
||||
|
||||
ENDFUNCTION()
|
||||
|
||||
MACRO(TRIBITS_PROCESS_TPL_DEP_FILE TPL_FILE)
|
||||
GET_FILENAME_COMPONENT(TPL_NAME ${TPL_FILE} NAME_WE)
|
||||
INCLUDE("${TPL_FILE}")
|
||||
IF(TARGET TPL_LIB_${TPL_NAME})
|
||||
MESSAGE(STATUS "Found tpl library: ${TPL_NAME}")
|
||||
SET(TPL_ENABLE_${TPL_NAME} TRUE)
|
||||
ELSE()
|
||||
MESSAGE(STATUS "Tpl library not found: ${TPL_NAME}")
|
||||
SET(TPL_ENABLE_${TPL_NAME} FALSE)
|
||||
ENDIF()
|
||||
ENDMACRO()
|
||||
#MACRO(TRIBITS_PROCESS_TPL_DEP_FILE TPL_FILE)
|
||||
# GET_FILENAME_COMPONENT(TPL_NAME ${TPL_FILE} NAME_WE)
|
||||
# INCLUDE("${TPL_FILE}")
|
||||
# IF(TARGET TPL_LIB_${TPL_NAME})
|
||||
# MESSAGE(STATUS "Found tpl library: ${TPL_NAME}")
|
||||
# SET(TPL_ENABLE_${TPL_NAME} TRUE)
|
||||
# ELSE()
|
||||
# MESSAGE(STATUS "Tpl library not found: ${TPL_NAME}")
|
||||
# SET(TPL_ENABLE_${TPL_NAME} FALSE)
|
||||
# ENDIF()
|
||||
#ENDMACRO()
|
||||
|
||||
MACRO(PREPEND_TARGET_SET VARNAME TARGET_NAME TYPE)
|
||||
IF(TYPE STREQUAL "REQUIRED")
|
||||
@ -475,6 +474,7 @@ MACRO(TRIBITS_SUBPACKAGE NAME)
|
||||
SET(PARENT_PACKAGE_NAME ${PACKAGE_NAME})
|
||||
SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME})
|
||||
STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC)
|
||||
SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME})
|
||||
|
||||
@ -494,11 +494,11 @@ MACRO(TRIBITS_PACKAGE_DECL NAME)
|
||||
SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC)
|
||||
|
||||
SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps")
|
||||
FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake")
|
||||
FOREACH(TPL_FILE ${TPLS_FILES})
|
||||
TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE})
|
||||
ENDFOREACH()
|
||||
#SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps")
|
||||
#FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake")
|
||||
#FOREACH(TPL_FILE ${TPLS_FILES})
|
||||
# TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE})
|
||||
#ENDFOREACH()
|
||||
|
||||
ENDMACRO()
|
||||
|
||||
|
||||
@ -11,3 +11,4 @@ tag: 2.03.13 date: 07:27:2017 master: da314444 develop: 29ccb58a
|
||||
tag: 2.04.00 date: 08:16:2017 master: 54eb75c0 develop: 32fb8ee1
|
||||
tag: 2.04.04 date: 09:11:2017 master: 2b7e9c20 develop: 51e7b25a
|
||||
tag: 2.04.11 date: 10:28:2017 master: 54a1330a develop: ed36c017
|
||||
tag: 2.5.11 date: 12:15:2017 master: dfe685f4 develop: ec7ad6d8
|
||||
|
||||
@ -39,6 +39,12 @@ cuda_args=""
|
||||
# Arguments for both NVCC and Host compiler
|
||||
shared_args=""
|
||||
|
||||
# Argument -c
|
||||
compile_arg=""
|
||||
|
||||
# Argument -o <obj>
|
||||
output_arg=""
|
||||
|
||||
# Linker arguments
|
||||
xlinker_args=""
|
||||
|
||||
@ -66,6 +72,7 @@ dry_run=0
|
||||
|
||||
# Skip NVCC compilation and use host compiler directly
|
||||
host_only=0
|
||||
host_only_args=""
|
||||
|
||||
# Enable workaround for CUDA 6.5 for pragma ident
|
||||
replace_pragma_ident=0
|
||||
@ -78,6 +85,14 @@ temp_dir=${TMPDIR:-/tmp}
|
||||
# Check if we have an optimization argument already
|
||||
optimization_applied=0
|
||||
|
||||
# Check if we have -std=c++X or --std=c++X already
|
||||
stdcxx_applied=0
|
||||
|
||||
# Run nvcc a second time to generate dependencies if needed
|
||||
depfile_separate=0
|
||||
depfile_output_arg=""
|
||||
depfile_target_arg=""
|
||||
|
||||
#echo "Arguments: $# $@"
|
||||
|
||||
while [ $# -gt 0 ]
|
||||
@ -109,12 +124,31 @@ do
|
||||
fi
|
||||
;;
|
||||
#Handle shared args (valid for both nvcc and the host compiler)
|
||||
-D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
|
||||
-D*|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
|
||||
shared_args="$shared_args $1"
|
||||
;;
|
||||
#Handle shared args that have an argument
|
||||
-o|-MT)
|
||||
shared_args="$shared_args $1 $2"
|
||||
#Handle compilation argument
|
||||
-c)
|
||||
compile_arg="$1"
|
||||
;;
|
||||
#Handle output argument
|
||||
-o)
|
||||
output_arg="$output_arg $1 $2"
|
||||
shift
|
||||
;;
|
||||
# Handle depfile arguments. We map them to a separate call to nvcc.
|
||||
-MD|-MMD)
|
||||
depfile_separate=1
|
||||
host_only_args="$host_only_args $1"
|
||||
;;
|
||||
-MF)
|
||||
depfile_output_arg="-o $2"
|
||||
host_only_args="$host_only_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
-MT)
|
||||
depfile_target_arg="$1 $2"
|
||||
host_only_args="$host_only_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
#Handle known nvcc args
|
||||
@ -130,16 +164,25 @@ do
|
||||
cuda_args="$cuda_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
#Handle c++11 setting
|
||||
--std=c++11|-std=c++11)
|
||||
shared_args="$shared_args $1"
|
||||
#Handle c++11
|
||||
--std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++1z|-std=c++1z)
|
||||
if [ $stdcxx_applied -eq 1 ]; then
|
||||
echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-std=c++1* or --std=c++1*), only the first is used because nvcc can only accept a single std setting"
|
||||
else
|
||||
shared_args="$shared_args $1"
|
||||
stdcxx_applied=1
|
||||
fi
|
||||
;;
|
||||
|
||||
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
|
||||
-std=c++98|--std=c++98)
|
||||
;;
|
||||
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
|
||||
-pedantic|-Wpedantic|-ansi)
|
||||
;;
|
||||
#strip of -Woverloaded-virtual to avoid "cc1: warning: command line option ‘-Woverloaded-virtual’ is valid for C++/ObjC++ but not for C"
|
||||
-Woverloaded-virtual)
|
||||
;;
|
||||
#strip -Xcompiler because we add it
|
||||
-Xcompiler)
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
@ -190,7 +233,7 @@ do
|
||||
object_files_xlinker="$object_files_xlinker -Xlinker $1"
|
||||
;;
|
||||
#Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
|
||||
*.dylib)
|
||||
@*|*.dylib)
|
||||
object_files="$object_files -Xlinker $1"
|
||||
object_files_xlinker="$object_files_xlinker -Xlinker $1"
|
||||
;;
|
||||
@ -230,7 +273,7 @@ if [ $first_xcompiler_arg -eq 0 ]; then
|
||||
fi
|
||||
|
||||
#Compose host only command
|
||||
host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
|
||||
host_command="$host_compiler $shared_args $host_only_args $compile_arg $output_arg $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
|
||||
|
||||
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
|
||||
if [ $replace_pragma_ident -eq 1 ]; then
|
||||
@ -262,10 +305,21 @@ else
|
||||
host_command="$host_command $object_files"
|
||||
fi
|
||||
|
||||
if [ $depfile_separate -eq 1 ]; then
|
||||
# run nvcc a second time to generate dependencies (without compiling)
|
||||
nvcc_depfile_command="$nvcc_command -M $depfile_target_arg $depfile_output_arg"
|
||||
else
|
||||
nvcc_depfile_command=""
|
||||
fi
|
||||
|
||||
nvcc_command="$nvcc_command $compile_arg $output_arg"
|
||||
|
||||
#Print command for dryrun
|
||||
if [ $dry_run -eq 1 ]; then
|
||||
if [ $host_only -eq 1 ]; then
|
||||
echo $host_command
|
||||
elif [ -n "$nvcc_depfile_command" ]; then
|
||||
echo $nvcc_command "&&" $nvcc_depfile_command
|
||||
else
|
||||
echo $nvcc_command
|
||||
fi
|
||||
@ -275,6 +329,8 @@ fi
|
||||
#Run compilation command
|
||||
if [ $host_only -eq 1 ]; then
|
||||
$host_command
|
||||
elif [ -n "$nvcc_depfile_command" ]; then
|
||||
$nvcc_command && $nvcc_depfile_command
|
||||
else
|
||||
$nvcc_command
|
||||
fi
|
||||
|
||||
@ -16,12 +16,12 @@ if [[ "$HOSTNAME" =~ (white|ride).* ]]; then
|
||||
MACHINE=white
|
||||
elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then
|
||||
MACHINE=bowman
|
||||
elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
|
||||
elif [[ "$HOSTNAME" =~ n.* ]]; then # Warning: very generic name
|
||||
if [[ "$PROCESSOR" = "aarch64" ]]; then
|
||||
MACHINE=sullivan
|
||||
else
|
||||
MACHINE=shepard
|
||||
fi
|
||||
elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
|
||||
MACHINE=shepard
|
||||
elif [[ "$HOSTNAME" =~ apollo ]]; then
|
||||
MACHINE=apollo
|
||||
elif [[ "$HOSTNAME" =~ sullivan ]]; then
|
||||
@ -45,7 +45,8 @@ GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits
|
||||
IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
CUDA_WARNING_FLAGS=""
|
||||
CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
PGI_WARNING_FLAGS=""
|
||||
|
||||
# Default. Machine specific can override.
|
||||
DEBUG=False
|
||||
@ -61,6 +62,8 @@ SPOT_CHECK=False
|
||||
|
||||
PRINT_HELP=False
|
||||
OPT_FLAG=""
|
||||
CXX_FLAGS_EXTRA=""
|
||||
LD_FLAGS_EXTRA=""
|
||||
KOKKOS_OPTIONS=""
|
||||
|
||||
#
|
||||
@ -111,6 +114,12 @@ do
|
||||
--with-cuda-options*)
|
||||
KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}"
|
||||
;;
|
||||
--cxxflags-extra*)
|
||||
CXX_FLAGS_EXTRA="${key#*=}"
|
||||
;;
|
||||
--ldflags-extra*)
|
||||
LD_FLAGS_EXTRA="${key#*=}"
|
||||
;;
|
||||
--help*)
|
||||
PRINT_HELP=True
|
||||
;;
|
||||
@ -150,20 +159,18 @@ if [ "$MACHINE" = "sems" ]; then
|
||||
|
||||
if [ "$SPOT_CHECK" = "True" ]; then
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
|
||||
COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/6.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
|
||||
"intel/17.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
|
||||
"cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
else
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/16.0.3 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
@ -184,6 +191,7 @@ elif [ "$MACHINE" = "white" ]; then
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
|
||||
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/5.4.0"
|
||||
CUDA_MODULE_LIST2="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/6.3.0,ibm/xl/13.1.6-BETA"
|
||||
|
||||
# Don't do pthread on white.
|
||||
GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
@ -192,6 +200,7 @@ elif [ "$MACHINE" = "white" ]; then
|
||||
COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
|
||||
"cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/9.0.103 $CUDA_MODULE_LIST2 $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
@ -210,8 +219,9 @@ elif [ "$MACHINE" = "bowman" ]; then
|
||||
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
COMPILERS=("intel/16.4.258 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/17.2.174 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/18.0.128 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
)
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
@ -241,13 +251,13 @@ elif [ "$MACHINE" = "shepard" ]; then
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=32
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
|
||||
|
||||
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
BASE_MODULE_LIST_INTEL="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
COMPILERS=("intel/17.4.196 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/18.0.128 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"pgi/17.10.0 $BASE_MODULE_LIST $GCC_BUILD_LIST pgc++ $PGI_WARNING_FLAGS"
|
||||
)
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
@ -280,7 +290,7 @@ elif [ "$MACHINE" = "apollo" ]; then
|
||||
|
||||
if [ "$SPOT_CHECK" = "True" ]; then
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
|
||||
COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
|
||||
@ -292,14 +302,13 @@ elif [ "$MACHINE" = "apollo" ]; then
|
||||
COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"clang/4.0.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
|
||||
"clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
|
||||
"gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
@ -336,6 +345,8 @@ if [ "$PRINT_HELP" = "True" ]; then
|
||||
echo "--dry-run: Just print what would be executed"
|
||||
echo "--build-only: Just do builds, don't run anything"
|
||||
echo "--opt-flag=FLAG: Optimization flag (default: -O3)"
|
||||
echo "--cxxflags-extra=FLAGS: Extra flags to be added to CXX_FLAGS"
|
||||
echo "--ldflags-extra=FLAGS: Extra flags to be added to LD_FLAGS"
|
||||
echo "--arch=ARCHITECTURE: overwrite architecture flags"
|
||||
echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS"
|
||||
echo "--build-list=BUILD,BUILD,BUILD..."
|
||||
@ -361,14 +372,14 @@ if [ "$PRINT_HELP" = "True" ]; then
|
||||
echo " Run all gcc tests"
|
||||
echo " % test_all_sandia gcc"
|
||||
echo ""
|
||||
echo " Run all gcc/4.7.2 and all intel tests"
|
||||
echo " % test_all_sandia gcc/4.7.2 intel"
|
||||
echo " Run all gcc/4.8.4 and all intel tests"
|
||||
echo " % test_all_sandia gcc/4.8.4 intel"
|
||||
echo ""
|
||||
echo " Run all tests in debug"
|
||||
echo " % test_all_sandia --debug"
|
||||
echo ""
|
||||
echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds"
|
||||
echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial"
|
||||
echo " Run gcc/4.8.4 and only do OpenMP and OpenMP_Serial builds"
|
||||
echo " % test_all_sandia gcc/4.8.4 --build-list=OpenMP,OpenMP_Serial"
|
||||
echo ""
|
||||
echo "If you want to kill the tests, do:"
|
||||
echo " hit ctrl-z"
|
||||
@ -566,10 +577,15 @@ single_build_and_test() {
|
||||
if [[ "$build_type" = *debug* ]]; then
|
||||
local extra_args="$extra_args --debug"
|
||||
local cxxflags="-g $compiler_warning_flags"
|
||||
local ldflags="-g"
|
||||
else
|
||||
local cxxflags="$OPT_FLAG $compiler_warning_flags"
|
||||
local ldflags="${OPT_FLAG}"
|
||||
fi
|
||||
|
||||
local cxxflags="${cxxflags} ${CXX_FLAGS_EXTRA}"
|
||||
local ldflags="${ldflags} ${LD_FLAGS_EXTRA}"
|
||||
|
||||
if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then
|
||||
local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS"
|
||||
fi
|
||||
@ -586,7 +602,7 @@ single_build_and_test() {
|
||||
run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; }
|
||||
fi
|
||||
else
|
||||
run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
|
||||
run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --ldflags=\"$ldflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
|
||||
local -i build_start_time=$(date +%s)
|
||||
run_cmd make -j 32 build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
|
||||
local -i build_end_time=$(date +%s)
|
||||
|
||||
@ -2,7 +2,10 @@
|
||||
|
||||
TRIBITS_SUBPACKAGE(Containers)
|
||||
|
||||
ADD_SUBDIRECTORY(src)
|
||||
|
||||
IF(KOKKOS_HAS_TRILINOS)
|
||||
ADD_SUBDIRECTORY(src)
|
||||
ENDIF()
|
||||
|
||||
TRIBITS_ADD_TEST_DIRECTORIES(unit_tests)
|
||||
TRIBITS_ADD_TEST_DIRECTORIES(performance_tests)
|
||||
|
||||
@ -3,6 +3,14 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
|
||||
|
||||
IF(NOT KOKKOS_HAS_TRILINOS)
|
||||
IF(KOKKOS_SEPARATE_LIBS)
|
||||
set(TEST_LINK_TARGETS kokkoscore)
|
||||
ELSE()
|
||||
set(TEST_LINK_TARGETS kokkos)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
SET(SOURCES
|
||||
TestMain.cpp
|
||||
TestCuda.cpp
|
||||
@ -24,7 +32,7 @@ TRIBITS_ADD_EXECUTABLE(
|
||||
PerfTestExec
|
||||
SOURCES ${SOURCES}
|
||||
COMM serial mpi
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
|
||||
TRIBITS_ADD_TEST(
|
||||
|
||||
@ -15,7 +15,8 @@ endif
|
||||
|
||||
CXXFLAGS = -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
LDFLAGS ?=
|
||||
override LDFLAGS += -lpthread
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
|
||||
@ -180,8 +180,8 @@ void test_dynrankview_op_perf( const int par_size )
|
||||
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
const size_type dim2 = 90;
|
||||
const size_type dim3 = 30;
|
||||
const size_type dim_2 = 90;
|
||||
const size_type dim_3 = 30;
|
||||
|
||||
double elapsed_time_view = 0;
|
||||
double elapsed_time_compview = 0;
|
||||
@ -191,7 +191,7 @@ void test_dynrankview_op_perf( const int par_size )
|
||||
double elapsed_time_compdrview = 0;
|
||||
Kokkos::Timer timer;
|
||||
{
|
||||
Kokkos::View<double***,DeviceType> testview("testview",par_size,dim2,dim3);
|
||||
Kokkos::View<double***,DeviceType> testview("testview",par_size,dim_2,dim_3);
|
||||
typedef InitViewFunctor<DeviceType> FunctorType;
|
||||
|
||||
timer.reset();
|
||||
@ -220,7 +220,7 @@ void test_dynrankview_op_perf( const int par_size )
|
||||
std::cout << " Strided View time (init only): " << elapsed_time_strideview << std::endl;
|
||||
}
|
||||
{
|
||||
Kokkos::View<double*******,DeviceType> testview("testview",par_size,dim2,dim3,1,1,1,1);
|
||||
Kokkos::View<double*******,DeviceType> testview("testview",par_size,dim_2,dim_3,1,1,1,1);
|
||||
typedef InitViewRank7Functor<DeviceType> FunctorType;
|
||||
|
||||
timer.reset();
|
||||
@ -231,7 +231,7 @@ void test_dynrankview_op_perf( const int par_size )
|
||||
std::cout << " View Rank7 time (init only): " << elapsed_time_view_rank7 << std::endl;
|
||||
}
|
||||
{
|
||||
Kokkos::DynRankView<double,DeviceType> testdrview("testdrview",par_size,dim2,dim3);
|
||||
Kokkos::DynRankView<double,DeviceType> testdrview("testdrview",par_size,dim_2,dim_3);
|
||||
typedef InitDynRankViewFunctor<DeviceType> FunctorType;
|
||||
|
||||
timer.reset();
|
||||
|
||||
@ -54,6 +54,7 @@
|
||||
#include <TestUnorderedMapPerformance.hpp>
|
||||
|
||||
#include <TestDynRankView.hpp>
|
||||
#include <TestScatterView.hpp>
|
||||
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
@ -122,6 +123,18 @@ TEST_F( openmp, unordered_map_performance_far)
|
||||
Perf::run_performance_tests<Kokkos::OpenMP,false>(base_file_name.str());
|
||||
}
|
||||
|
||||
TEST_F( openmp, scatter_view)
|
||||
{
|
||||
std::cout << "ScatterView data-duplicated test:\n";
|
||||
Perf::test_scatter_view<Kokkos::OpenMP, Kokkos::LayoutRight,
|
||||
Kokkos::Experimental::ScatterDuplicated,
|
||||
Kokkos::Experimental::ScatterNonAtomic>(10, 1000 * 1000);
|
||||
//std::cout << "ScatterView atomics test:\n";
|
||||
//Perf::test_scatter_view<Kokkos::OpenMP, Kokkos::LayoutRight,
|
||||
// Kokkos::Experimental::ScatterNonDuplicated,
|
||||
// Kokkos::Experimental::ScatterAtomic>(10, 1000 * 1000);
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
#else
|
||||
void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTOPENMP_PREVENT_EMPTY_LINK_ERROR() {}
|
||||
|
||||
113
lib/kokkos/containers/performance_tests/TestScatterView.hpp
Normal file
113
lib/kokkos/containers/performance_tests/TestScatterView.hpp
Normal file
@ -0,0 +1,113 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_TEST_SCATTER_VIEW_HPP
|
||||
#define KOKKOS_TEST_SCATTER_VIEW_HPP
|
||||
|
||||
#include <Kokkos_ScatterView.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
namespace Perf {
|
||||
|
||||
template <typename ExecSpace, typename Layout, int duplication, int contribution>
|
||||
void test_scatter_view(int m, int n)
|
||||
{
|
||||
Kokkos::View<double *[3], Layout, ExecSpace> original_view("original_view", n);
|
||||
{
|
||||
auto scatter_view = Kokkos::Experimental::create_scatter_view
|
||||
< Kokkos::Experimental::ScatterSum
|
||||
, duplication
|
||||
, contribution
|
||||
> (original_view);
|
||||
Kokkos::Experimental::UniqueToken<
|
||||
ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global>
|
||||
unique_token{ExecSpace()};
|
||||
//auto internal_view = scatter_view.internal_view;
|
||||
auto policy = Kokkos::RangePolicy<ExecSpace, int>(0, n);
|
||||
for (int foo = 0; foo < 5; ++foo) {
|
||||
{
|
||||
auto num_threads = unique_token.size();
|
||||
std::cout << "num_threads " << num_threads << '\n';
|
||||
Kokkos::View<double **[3], Layout, ExecSpace> hand_coded_duplicate_view("hand_coded_duplicate", num_threads, n);
|
||||
auto f2 = KOKKOS_LAMBDA(int i) {
|
||||
auto thread_id = unique_token.acquire();
|
||||
for (int j = 0; j < 10; ++j) {
|
||||
auto k = (i + j) % n;
|
||||
hand_coded_duplicate_view(thread_id, k, 0) += 4.2;
|
||||
hand_coded_duplicate_view(thread_id, k, 1) += 2.0;
|
||||
hand_coded_duplicate_view(thread_id, k, 2) += 1.0;
|
||||
}
|
||||
};
|
||||
Kokkos::Timer timer;
|
||||
timer.reset();
|
||||
for (int k = 0; k < m; ++k) {
|
||||
Kokkos::parallel_for(policy, f2, "hand_coded_duplicate_scatter_view_test");
|
||||
}
|
||||
auto t = timer.seconds();
|
||||
std::cout << "hand-coded test took " << t << " seconds\n";
|
||||
}
|
||||
{
|
||||
auto f = KOKKOS_LAMBDA(int i) {
|
||||
auto scatter_access = scatter_view.access();
|
||||
for (int j = 0; j < 10; ++j) {
|
||||
auto k = (i + j) % n;
|
||||
scatter_access(k, 0) += 4.2;
|
||||
scatter_access(k, 1) += 2.0;
|
||||
scatter_access(k, 2) += 1.0;
|
||||
}
|
||||
};
|
||||
Kokkos::Timer timer;
|
||||
timer.reset();
|
||||
for (int k = 0; k < m; ++k) {
|
||||
Kokkos::parallel_for(policy, f, "scatter_view_test");
|
||||
}
|
||||
auto t = timer.seconds();
|
||||
std::cout << "test took " << t << " seconds\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -6,26 +6,42 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
SET(HEADERS "")
|
||||
SET(SOURCES "")
|
||||
|
||||
SET(HEADERS_IMPL "")
|
||||
|
||||
FILE(GLOB HEADERS *.hpp)
|
||||
FILE(GLOB HEADERS_IMPL impl/*.hpp)
|
||||
FILE(GLOB SOURCES impl/*.cpp)
|
||||
|
||||
SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
|
||||
|
||||
INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/)
|
||||
if(KOKKOS_LEGACY_TRIBITS)
|
||||
|
||||
TRIBITS_ADD_LIBRARY(
|
||||
kokkoscontainers
|
||||
HEADERS ${HEADERS}
|
||||
NOINSTALLHEADERS ${HEADERS_IMPL}
|
||||
SOURCES ${SOURCES}
|
||||
DEPLIBS
|
||||
)
|
||||
SET(HEADERS "")
|
||||
SET(SOURCES "")
|
||||
|
||||
SET(HEADERS_IMPL "")
|
||||
|
||||
FILE(GLOB HEADERS *.hpp)
|
||||
FILE(GLOB HEADERS_IMPL impl/*.hpp)
|
||||
FILE(GLOB SOURCES impl/*.cpp)
|
||||
|
||||
INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/)
|
||||
|
||||
TRIBITS_ADD_LIBRARY(
|
||||
kokkoscontainers
|
||||
HEADERS ${HEADERS}
|
||||
NOINSTALLHEADERS ${HEADERS_IMPL}
|
||||
SOURCES ${SOURCES}
|
||||
DEPLIBS
|
||||
)
|
||||
|
||||
else()
|
||||
|
||||
INSTALL (
|
||||
DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
|
||||
DESTINATION ${TRILINOS_INCDIR}
|
||||
FILES_MATCHING PATTERN "*.hpp"
|
||||
)
|
||||
|
||||
TRIBITS_ADD_LIBRARY(
|
||||
kokkoscontainers
|
||||
SOURCES ${KOKKOS_CONTAINERS_SRCS}
|
||||
DEPLIBS
|
||||
)
|
||||
|
||||
endif()
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
999
lib/kokkos/containers/src/Kokkos_ScatterView.hpp
Normal file
999
lib/kokkos/containers/src/Kokkos_ScatterView.hpp
Normal file
@ -0,0 +1,999 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
|
||||
/// \file Kokkos_ScatterView.hpp
|
||||
/// \brief Declaration and definition of Kokkos::ScatterView.
|
||||
///
|
||||
/// This header file declares and defines Kokkos::ScatterView and its
|
||||
/// related nonmember functions.
|
||||
|
||||
#ifndef KOKKOS_SCATTER_VIEW_HPP
|
||||
#define KOKKOS_SCATTER_VIEW_HPP
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <utility>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
//TODO: replace this enum with the Kokkos::Sum, etc reducers for parallel_reduce
|
||||
enum : int {
|
||||
ScatterSum,
|
||||
};
|
||||
|
||||
enum : int {
|
||||
ScatterNonDuplicated = 0,
|
||||
ScatterDuplicated = 1
|
||||
};
|
||||
|
||||
enum : int {
|
||||
ScatterNonAtomic = 0,
|
||||
ScatterAtomic = 1
|
||||
};
|
||||
|
||||
}} // Kokkos::Experimental
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
namespace Experimental {
|
||||
|
||||
template <typename ExecSpace>
|
||||
struct DefaultDuplication;
|
||||
|
||||
template <typename ExecSpace, int duplication>
|
||||
struct DefaultContribution;
|
||||
|
||||
#ifdef KOKKOS_ENABLE_SERIAL
|
||||
template <>
|
||||
struct DefaultDuplication<Kokkos::Serial> {
|
||||
enum : int { value = Kokkos::Experimental::ScatterNonDuplicated };
|
||||
};
|
||||
template <>
|
||||
struct DefaultContribution<Kokkos::Serial, Kokkos::Experimental::ScatterNonDuplicated> {
|
||||
enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
|
||||
};
|
||||
template <>
|
||||
struct DefaultContribution<Kokkos::Serial, Kokkos::Experimental::ScatterDuplicated> {
|
||||
enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
template <>
|
||||
struct DefaultDuplication<Kokkos::OpenMP> {
|
||||
enum : int { value = Kokkos::Experimental::ScatterDuplicated };
|
||||
};
|
||||
template <>
|
||||
struct DefaultContribution<Kokkos::OpenMP, Kokkos::Experimental::ScatterNonDuplicated> {
|
||||
enum : int { value = Kokkos::Experimental::ScatterAtomic };
|
||||
};
|
||||
template <>
|
||||
struct DefaultContribution<Kokkos::OpenMP, Kokkos::Experimental::ScatterDuplicated> {
|
||||
enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ENABLE_THREADS
|
||||
template <>
|
||||
struct DefaultDuplication<Kokkos::Threads> {
|
||||
enum : int { value = Kokkos::Experimental::ScatterDuplicated };
|
||||
};
|
||||
template <>
|
||||
struct DefaultContribution<Kokkos::Threads, Kokkos::Experimental::ScatterNonDuplicated> {
|
||||
enum : int { value = Kokkos::Experimental::ScatterAtomic };
|
||||
};
|
||||
template <>
|
||||
struct DefaultContribution<Kokkos::Threads, Kokkos::Experimental::ScatterDuplicated> {
|
||||
enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
template <>
|
||||
struct DefaultDuplication<Kokkos::Cuda> {
|
||||
enum : int { value = Kokkos::Experimental::ScatterNonDuplicated };
|
||||
};
|
||||
template <>
|
||||
struct DefaultContribution<Kokkos::Cuda, Kokkos::Experimental::ScatterNonDuplicated> {
|
||||
enum : int { value = Kokkos::Experimental::ScatterAtomic };
|
||||
};
|
||||
template <>
|
||||
struct DefaultContribution<Kokkos::Cuda, Kokkos::Experimental::ScatterDuplicated> {
|
||||
enum : int { value = Kokkos::Experimental::ScatterAtomic };
|
||||
};
|
||||
#endif
|
||||
|
||||
/* ScatterValue is the object returned by the access operator() of ScatterAccess,
|
||||
similar to that returned by an Atomic View, it wraps Kokkos::atomic_add with convenient
|
||||
operator+=, etc. */
|
||||
template <typename ValueType, int Op, int contribution>
|
||||
struct ScatterValue;
|
||||
|
||||
template <typename ValueType>
|
||||
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonAtomic> {
|
||||
public:
|
||||
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : value( value_in ) {}
|
||||
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : value( other.value ) {}
|
||||
KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
|
||||
value += rhs;
|
||||
}
|
||||
KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
|
||||
value -= rhs;
|
||||
}
|
||||
private:
|
||||
ValueType& value;
|
||||
};
|
||||
|
||||
template <typename ValueType>
|
||||
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterAtomic> {
|
||||
public:
|
||||
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : value( value_in ) {}
|
||||
KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
|
||||
Kokkos::atomic_add(&value, rhs);
|
||||
}
|
||||
KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
|
||||
Kokkos::atomic_add(&value, -rhs);
|
||||
}
|
||||
private:
|
||||
ValueType& value;
|
||||
};
|
||||
|
||||
/* DuplicatedDataType, given a View DataType, will create a new DataType
|
||||
that has a new runtime dimension which becomes the largest-stride dimension.
|
||||
In the case of LayoutLeft, due to the limitation induced by the design of DataType
|
||||
itself, it must convert any existing compile-time dimensions into runtime dimensions. */
|
||||
template <typename T, typename Layout>
|
||||
struct DuplicatedDataType;
|
||||
|
||||
template <typename T>
|
||||
struct DuplicatedDataType<T, Kokkos::LayoutRight> {
|
||||
typedef T* value_type; // For LayoutRight, add a star all the way on the left
|
||||
};
|
||||
|
||||
template <typename T, size_t N>
|
||||
struct DuplicatedDataType<T[N], Kokkos::LayoutRight> {
|
||||
typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type value_type[N];
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct DuplicatedDataType<T[], Kokkos::LayoutRight> {
|
||||
typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type value_type[];
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct DuplicatedDataType<T*, Kokkos::LayoutRight> {
|
||||
typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type* value_type;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct DuplicatedDataType<T, Kokkos::LayoutLeft> {
|
||||
typedef T* value_type;
|
||||
};
|
||||
|
||||
template <typename T, size_t N>
|
||||
struct DuplicatedDataType<T[N], Kokkos::LayoutLeft> {
|
||||
typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type* value_type;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct DuplicatedDataType<T[], Kokkos::LayoutLeft> {
|
||||
typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type* value_type;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct DuplicatedDataType<T*, Kokkos::LayoutLeft> {
|
||||
typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type* value_type;
|
||||
};
|
||||
|
||||
/* Slice is just responsible for stuffing the correct number of Kokkos::ALL
|
||||
arguments on the correct side of the index in a call to subview() to get a
|
||||
subview where the index specified is the largest-stride one. */
|
||||
template <typename Layout, int rank, typename V, typename ... Args>
|
||||
struct Slice {
|
||||
typedef Slice<Layout, rank - 1, V, Kokkos::Impl::ALL_t, Args...> next;
|
||||
typedef typename next::value_type value_type;
|
||||
|
||||
static
|
||||
value_type get(V const& src, const size_t i, Args ... args) {
|
||||
return next::get(src, i, Kokkos::ALL, args...);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename V, typename ... Args>
|
||||
struct Slice<Kokkos::LayoutRight, 1, V, Args...> {
|
||||
typedef typename Kokkos::Impl::ViewMapping
|
||||
< void
|
||||
, V
|
||||
, const size_t
|
||||
, Args ...
|
||||
>::type value_type;
|
||||
static
|
||||
value_type get(V const& src, const size_t i, Args ... args) {
|
||||
return Kokkos::subview(src, i, args...);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename V, typename ... Args>
|
||||
struct Slice<Kokkos::LayoutLeft, 1, V, Args...> {
|
||||
typedef typename Kokkos::Impl::ViewMapping
|
||||
< void
|
||||
, V
|
||||
, Args ...
|
||||
, const size_t
|
||||
>::type value_type;
|
||||
static
|
||||
value_type get(V const& src, const size_t i, Args ... args) {
|
||||
return Kokkos::subview(src, args..., i);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ExecSpace, typename ValueType, int Op>
|
||||
struct ReduceDuplicates;
|
||||
|
||||
template <typename ExecSpace, typename ValueType, int Op>
|
||||
struct ReduceDuplicatesBase {
|
||||
typedef ReduceDuplicates<ExecSpace, ValueType, Op> Derived;
|
||||
ValueType const* src;
|
||||
ValueType* dst;
|
||||
size_t stride;
|
||||
size_t start;
|
||||
size_t n;
|
||||
ReduceDuplicatesBase(ValueType const* src_in, ValueType* dest_in, size_t stride_in, size_t start_in, size_t n_in, std::string const& name)
|
||||
: src(src_in)
|
||||
, dst(dest_in)
|
||||
, stride(stride_in)
|
||||
, start(start_in)
|
||||
, n(n_in)
|
||||
{
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
uint64_t kpID = 0;
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, &kpID);
|
||||
}
|
||||
#endif
|
||||
typedef RangePolicy<ExecSpace, size_t> policy_type;
|
||||
typedef Kokkos::Impl::ParallelFor<Derived, policy_type> closure_type;
|
||||
const closure_type closure(*(static_cast<Derived*>(this)), policy_type(0, stride));
|
||||
closure.execute();
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::endParallelFor(kpID);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ExecSpace, typename ValueType>
|
||||
struct ReduceDuplicates<ExecSpace, ValueType, Kokkos::Experimental::ScatterSum> :
|
||||
public ReduceDuplicatesBase<ExecSpace, ValueType, Kokkos::Experimental::ScatterSum>
|
||||
{
|
||||
typedef ReduceDuplicatesBase<ExecSpace, ValueType, Kokkos::Experimental::ScatterSum> Base;
|
||||
ReduceDuplicates(ValueType const* src_in, ValueType* dst_in, size_t stride_in, size_t start_in, size_t n_in, std::string const& name):
|
||||
Base(src_in, dst_in, stride_in, start_in, n_in, name)
|
||||
{}
|
||||
KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
|
||||
for (size_t j = Base::start; j < Base::n; ++j) {
|
||||
Base::dst[i] += Base::src[i + Base::stride * j];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ExecSpace, typename ValueType, int Op>
|
||||
struct ResetDuplicates;
|
||||
|
||||
template <typename ExecSpace, typename ValueType, int Op>
|
||||
struct ResetDuplicatesBase {
|
||||
typedef ResetDuplicates<ExecSpace, ValueType, Op> Derived;
|
||||
ValueType* data;
|
||||
ResetDuplicatesBase(ValueType* data_in, size_t size_in, std::string const& name)
|
||||
: data(data_in)
|
||||
{
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
uint64_t kpID = 0;
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, &kpID);
|
||||
}
|
||||
#endif
|
||||
typedef RangePolicy<ExecSpace, size_t> policy_type;
|
||||
typedef Kokkos::Impl::ParallelFor<Derived, policy_type> closure_type;
|
||||
const closure_type closure(*(static_cast<Derived*>(this)), policy_type(0, size_in));
|
||||
closure.execute();
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::endParallelFor(kpID);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ExecSpace, typename ValueType>
|
||||
struct ResetDuplicates<ExecSpace, ValueType, Kokkos::Experimental::ScatterSum> :
|
||||
public ResetDuplicatesBase<ExecSpace, ValueType, Kokkos::Experimental::ScatterSum>
|
||||
{
|
||||
typedef ResetDuplicatesBase<ExecSpace, ValueType, Kokkos::Experimental::ScatterSum> Base;
|
||||
ResetDuplicates(ValueType* data_in, size_t size_in, std::string const& name):
|
||||
Base(data_in, size_in, name)
|
||||
{}
|
||||
KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
|
||||
Base::data[i] = Kokkos::reduction_identity<ValueType>::sum();
|
||||
}
|
||||
};
|
||||
|
||||
}}} // Kokkos::Impl::Experimental
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
template <typename DataType
|
||||
,typename Layout = Kokkos::DefaultExecutionSpace::array_layout
|
||||
,typename ExecSpace = Kokkos::DefaultExecutionSpace
|
||||
,int Op = ScatterSum
|
||||
,int duplication = Kokkos::Impl::Experimental::DefaultDuplication<ExecSpace>::value
|
||||
,int contribution = Kokkos::Impl::Experimental::DefaultContribution<ExecSpace, duplication>::value
|
||||
>
|
||||
class ScatterView;
|
||||
|
||||
template <typename DataType
|
||||
,int Op
|
||||
,typename ExecSpace
|
||||
,typename Layout
|
||||
,int duplication
|
||||
,int contribution
|
||||
,int override_contribution
|
||||
>
|
||||
class ScatterAccess;
|
||||
|
||||
// non-duplicated implementation
|
||||
template <typename DataType
|
||||
,int Op
|
||||
,typename ExecSpace
|
||||
,typename Layout
|
||||
,int contribution
|
||||
>
|
||||
class ScatterView<DataType
|
||||
,Layout
|
||||
,ExecSpace
|
||||
,Op
|
||||
,ScatterNonDuplicated
|
||||
,contribution>
|
||||
{
|
||||
public:
|
||||
typedef Kokkos::View<DataType, Layout, ExecSpace> original_view_type;
|
||||
typedef typename original_view_type::value_type original_value_type;
|
||||
typedef typename original_view_type::reference_type original_reference_type;
|
||||
friend class ScatterAccess<DataType, Op, ExecSpace, Layout, ScatterNonDuplicated, contribution, ScatterNonAtomic>;
|
||||
friend class ScatterAccess<DataType, Op, ExecSpace, Layout, ScatterNonDuplicated, contribution, ScatterAtomic>;
|
||||
|
||||
ScatterView()
|
||||
{
|
||||
}
|
||||
|
||||
template <typename RT, typename ... RP>
|
||||
ScatterView(View<RT, RP...> const& original_view)
|
||||
: internal_view(original_view)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename ... Dims>
|
||||
ScatterView(std::string const& name, Dims ... dims)
|
||||
: internal_view(name, dims ...)
|
||||
{
|
||||
}
|
||||
|
||||
template <int override_contrib = contribution>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
ScatterAccess<DataType, Op, ExecSpace, Layout, ScatterNonDuplicated, contribution, override_contrib>
|
||||
access() const {
|
||||
return ScatterAccess<DataType, Op, ExecSpace, Layout, ScatterNonDuplicated, contribution, override_contrib>{*this};
|
||||
}
|
||||
|
||||
original_view_type subview() const {
|
||||
return internal_view;
|
||||
}
|
||||
|
||||
template <typename DT, typename ... RP>
|
||||
void contribute_into(View<DT, RP...> const& dest) const
|
||||
{
|
||||
typedef View<DT, RP...> dest_type;
|
||||
static_assert(std::is_same<
|
||||
typename dest_type::array_layout,
|
||||
Layout>::value,
|
||||
"ScatterView contribute destination has different layout");
|
||||
static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
|
||||
typename ExecSpace::memory_space,
|
||||
typename dest_type::memory_space>::value,
|
||||
"ScatterView contribute destination memory space not accessible");
|
||||
if (dest.data() == internal_view.data()) return;
|
||||
Kokkos::Impl::Experimental::ReduceDuplicates<ExecSpace, original_value_type, Op>(
|
||||
internal_view.data(),
|
||||
dest.data(),
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
internal_view.label());
|
||||
}
|
||||
|
||||
void reset() {
|
||||
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
|
||||
internal_view.data(),
|
||||
internal_view.size(),
|
||||
internal_view.label());
|
||||
}
|
||||
template <typename DT, typename ... RP>
|
||||
void reset_except(View<DT, RP...> const& view) {
|
||||
if (view.data() != internal_view.data()) reset();
|
||||
}
|
||||
|
||||
void resize(const size_t n0 = 0,
|
||||
const size_t n1 = 0,
|
||||
const size_t n2 = 0,
|
||||
const size_t n3 = 0,
|
||||
const size_t n4 = 0,
|
||||
const size_t n5 = 0,
|
||||
const size_t n6 = 0,
|
||||
const size_t n7 = 0) {
|
||||
::Kokkos::resize(internal_view,n0,n1,n2,n3,n4,n5,n6,n7);
|
||||
}
|
||||
|
||||
void realloc(const size_t n0 = 0,
|
||||
const size_t n1 = 0,
|
||||
const size_t n2 = 0,
|
||||
const size_t n3 = 0,
|
||||
const size_t n4 = 0,
|
||||
const size_t n5 = 0,
|
||||
const size_t n6 = 0,
|
||||
const size_t n7 = 0) {
|
||||
::Kokkos::realloc(internal_view,n0,n1,n2,n3,n4,n5,n6,n7);
|
||||
}
|
||||
|
||||
protected:
|
||||
template <typename ... Args>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
original_reference_type at(Args ... args) const {
|
||||
return internal_view(args...);
|
||||
}
|
||||
private:
|
||||
typedef original_view_type internal_view_type;
|
||||
internal_view_type internal_view;
|
||||
};
|
||||
|
||||
template <typename DataType
|
||||
,int Op
|
||||
,typename ExecSpace
|
||||
,typename Layout
|
||||
,int contribution
|
||||
,int override_contribution
|
||||
>
|
||||
class ScatterAccess<DataType
|
||||
,Op
|
||||
,ExecSpace
|
||||
,Layout
|
||||
,ScatterNonDuplicated
|
||||
,contribution
|
||||
,override_contribution>
|
||||
{
|
||||
public:
|
||||
typedef ScatterView<DataType, Layout, ExecSpace, Op, ScatterNonDuplicated, contribution> view_type;
|
||||
typedef typename view_type::original_value_type original_value_type;
|
||||
typedef Kokkos::Impl::Experimental::ScatterValue<
|
||||
original_value_type, Op, override_contribution> value_type;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ScatterAccess(view_type const& view_in)
|
||||
: view(view_in)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename ... Args>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
value_type operator()(Args ... args) const {
|
||||
return view.at(args...);
|
||||
}
|
||||
|
||||
template <typename Arg>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<view_type::original_view_type::rank == 1 &&
|
||||
std::is_integral<Arg>::value, value_type>::type
|
||||
operator[](Arg arg) const {
|
||||
return view.at(arg);
|
||||
}
|
||||
|
||||
private:
|
||||
view_type const& view;
|
||||
};
|
||||
|
||||
// duplicated implementation
|
||||
// LayoutLeft and LayoutRight are different enough that we'll just specialize each
|
||||
|
||||
template <typename DataType
|
||||
,int Op
|
||||
,typename ExecSpace
|
||||
,int contribution
|
||||
>
|
||||
class ScatterView<DataType
|
||||
,Kokkos::LayoutRight
|
||||
,ExecSpace
|
||||
,Op
|
||||
,ScatterDuplicated
|
||||
,contribution>
|
||||
{
|
||||
public:
|
||||
typedef Kokkos::View<DataType, Kokkos::LayoutRight, ExecSpace> original_view_type;
|
||||
typedef typename original_view_type::value_type original_value_type;
|
||||
typedef typename original_view_type::reference_type original_reference_type;
|
||||
friend class ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutRight, ScatterDuplicated, contribution, ScatterNonAtomic>;
|
||||
friend class ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutRight, ScatterDuplicated, contribution, ScatterAtomic>;
|
||||
typedef typename Kokkos::Impl::Experimental::DuplicatedDataType<DataType, Kokkos::LayoutRight> data_type_info;
|
||||
typedef typename data_type_info::value_type internal_data_type;
|
||||
typedef Kokkos::View<internal_data_type, Kokkos::LayoutRight, ExecSpace> internal_view_type;
|
||||
|
||||
ScatterView()
|
||||
{
|
||||
}
|
||||
|
||||
template <typename RT, typename ... RP >
|
||||
ScatterView(View<RT, RP...> const& original_view)
|
||||
: unique_token()
|
||||
, internal_view(Kokkos::ViewAllocateWithoutInitializing(
|
||||
std::string("duplicated_") + original_view.label()),
|
||||
unique_token.size(),
|
||||
original_view.extent(0),
|
||||
original_view.extent(1),
|
||||
original_view.extent(2),
|
||||
original_view.extent(3),
|
||||
original_view.extent(4),
|
||||
original_view.extent(5),
|
||||
original_view.extent(6))
|
||||
{
|
||||
reset();
|
||||
}
|
||||
|
||||
template <typename ... Dims>
|
||||
ScatterView(std::string const& name, Dims ... dims)
|
||||
: internal_view(Kokkos::ViewAllocateWithoutInitializing(name), unique_token.size(), dims ...)
|
||||
{
|
||||
reset();
|
||||
}
|
||||
|
||||
template <int override_contribution = contribution>
|
||||
inline
|
||||
ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutRight, ScatterDuplicated, contribution, override_contribution>
|
||||
access() const {
|
||||
return ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutRight, ScatterDuplicated, contribution, override_contribution>{*this};
|
||||
}
|
||||
|
||||
typename Kokkos::Impl::Experimental::Slice<
|
||||
Kokkos::LayoutRight, internal_view_type::rank, internal_view_type>::value_type
|
||||
subview() const
|
||||
{
|
||||
return Kokkos::Impl::Experimental::Slice<
|
||||
Kokkos::LayoutRight, internal_view_type::Rank, internal_view_type>::get(internal_view, 0);
|
||||
}
|
||||
|
||||
template <typename DT, typename ... RP>
|
||||
void contribute_into(View<DT, RP...> const& dest) const
|
||||
{
|
||||
typedef View<DT, RP...> dest_type;
|
||||
static_assert(std::is_same<
|
||||
typename dest_type::array_layout,
|
||||
Kokkos::LayoutRight>::value,
|
||||
"ScatterView deep_copy destination has different layout");
|
||||
static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
|
||||
typename ExecSpace::memory_space,
|
||||
typename dest_type::memory_space>::value,
|
||||
"ScatterView deep_copy destination memory space not accessible");
|
||||
size_t strides[8];
|
||||
internal_view.stride(strides);
|
||||
bool is_equal = (dest.data() == internal_view.data());
|
||||
size_t start = is_equal ? 1 : 0;
|
||||
Kokkos::Impl::Experimental::ReduceDuplicates<ExecSpace, original_value_type, Op>(
|
||||
internal_view.data(),
|
||||
dest.data(),
|
||||
strides[0],
|
||||
start,
|
||||
internal_view.extent(0),
|
||||
internal_view.label());
|
||||
}
|
||||
|
||||
void reset() {
|
||||
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
|
||||
internal_view.data(),
|
||||
internal_view.size(),
|
||||
internal_view.label());
|
||||
}
|
||||
template <typename DT, typename ... RP>
|
||||
void reset_except(View<DT, RP...> const& view) {
|
||||
if (view.data() != internal_view.data()) {
|
||||
reset();
|
||||
return;
|
||||
}
|
||||
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
|
||||
internal_view.data() + view.size(),
|
||||
internal_view.size() - view.size(),
|
||||
internal_view.label());
|
||||
}
|
||||
|
||||
void resize(const size_t n0 = 0,
|
||||
const size_t n1 = 0,
|
||||
const size_t n2 = 0,
|
||||
const size_t n3 = 0,
|
||||
const size_t n4 = 0,
|
||||
const size_t n5 = 0,
|
||||
const size_t n6 = 0) {
|
||||
::Kokkos::resize(internal_view,unique_token.size(),n0,n1,n2,n3,n4,n5,n6);
|
||||
}
|
||||
|
||||
void realloc(const size_t n0 = 0,
|
||||
const size_t n1 = 0,
|
||||
const size_t n2 = 0,
|
||||
const size_t n3 = 0,
|
||||
const size_t n4 = 0,
|
||||
const size_t n5 = 0,
|
||||
const size_t n6 = 0) {
|
||||
::Kokkos::realloc(internal_view,unique_token.size(),n0,n1,n2,n3,n4,n5,n6);
|
||||
}
|
||||
|
||||
protected:
|
||||
template <typename ... Args>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
original_reference_type at(int rank, Args ... args) const {
|
||||
return internal_view(rank, args...);
|
||||
}
|
||||
|
||||
protected:
|
||||
typedef Kokkos::Experimental::UniqueToken<
|
||||
ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type;
|
||||
|
||||
unique_token_type unique_token;
|
||||
internal_view_type internal_view;
|
||||
};
|
||||
|
||||
template <typename DataType
|
||||
,int Op
|
||||
,typename ExecSpace
|
||||
,int contribution
|
||||
>
|
||||
class ScatterView<DataType
|
||||
,Kokkos::LayoutLeft
|
||||
,ExecSpace
|
||||
,Op
|
||||
,ScatterDuplicated
|
||||
,contribution>
|
||||
{
|
||||
public:
|
||||
typedef Kokkos::View<DataType, Kokkos::LayoutLeft, ExecSpace> original_view_type;
|
||||
typedef typename original_view_type::value_type original_value_type;
|
||||
typedef typename original_view_type::reference_type original_reference_type;
|
||||
friend class ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutLeft, ScatterDuplicated, contribution, ScatterNonAtomic>;
|
||||
friend class ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutLeft, ScatterDuplicated, contribution, ScatterAtomic>;
|
||||
typedef typename Kokkos::Impl::Experimental::DuplicatedDataType<DataType, Kokkos::LayoutLeft> data_type_info;
|
||||
typedef typename data_type_info::value_type internal_data_type;
|
||||
typedef Kokkos::View<internal_data_type, Kokkos::LayoutLeft, ExecSpace> internal_view_type;
|
||||
|
||||
ScatterView()
|
||||
{
|
||||
}
|
||||
|
||||
template <typename RT, typename ... RP >
|
||||
ScatterView(View<RT, RP...> const& original_view)
|
||||
: unique_token()
|
||||
{
|
||||
size_t arg_N[8] = {
|
||||
original_view.extent(0),
|
||||
original_view.extent(1),
|
||||
original_view.extent(2),
|
||||
original_view.extent(3),
|
||||
original_view.extent(4),
|
||||
original_view.extent(5),
|
||||
original_view.extent(6),
|
||||
0
|
||||
};
|
||||
arg_N[internal_view_type::rank - 1] = unique_token.size();
|
||||
internal_view = internal_view_type(
|
||||
Kokkos::ViewAllocateWithoutInitializing(
|
||||
std::string("duplicated_") + original_view.label()),
|
||||
arg_N[0], arg_N[1], arg_N[2], arg_N[3],
|
||||
arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
|
||||
reset();
|
||||
}
|
||||
|
||||
template <typename ... Dims>
|
||||
ScatterView(std::string const& name, Dims ... dims)
|
||||
: internal_view(Kokkos::ViewAllocateWithoutInitializing(name), dims ..., unique_token.size())
|
||||
{
|
||||
reset();
|
||||
}
|
||||
|
||||
template <int override_contribution = contribution>
|
||||
inline
|
||||
ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutLeft, ScatterDuplicated, contribution, override_contribution>
|
||||
access() const {
|
||||
return ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutLeft, ScatterDuplicated, contribution, override_contribution>{*this};
|
||||
}
|
||||
|
||||
typename Kokkos::Impl::Experimental::Slice<
|
||||
Kokkos::LayoutLeft, internal_view_type::rank, internal_view_type>::value_type
|
||||
subview() const
|
||||
{
|
||||
return Kokkos::Impl::Experimental::Slice<
|
||||
Kokkos::LayoutLeft, internal_view_type::rank, internal_view_type>::get(internal_view, 0);
|
||||
}
|
||||
|
||||
template <typename ... RP>
|
||||
void contribute_into(View<DataType, RP...> const& dest) const
|
||||
{
|
||||
typedef View<DataType, RP...> dest_type;
|
||||
static_assert(std::is_same<
|
||||
typename dest_type::array_layout,
|
||||
Kokkos::LayoutLeft>::value,
|
||||
"ScatterView deep_copy destination has different layout");
|
||||
static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
|
||||
typename ExecSpace::memory_space,
|
||||
typename dest_type::memory_space>::value,
|
||||
"ScatterView deep_copy destination memory space not accessible");
|
||||
size_t strides[8];
|
||||
internal_view.stride(strides);
|
||||
size_t stride = strides[internal_view_type::rank - 1];
|
||||
auto extent = internal_view.extent(
|
||||
internal_view_type::rank - 1);
|
||||
bool is_equal = (dest.data() == internal_view.data());
|
||||
size_t start = is_equal ? 1 : 0;
|
||||
Kokkos::Impl::Experimental::ReduceDuplicates<ExecSpace, original_value_type, Op>(
|
||||
internal_view.data(),
|
||||
dest.data(),
|
||||
stride,
|
||||
start,
|
||||
extent,
|
||||
internal_view.label());
|
||||
}
|
||||
|
||||
void reset() {
|
||||
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
|
||||
internal_view.data(),
|
||||
internal_view.size(),
|
||||
internal_view.label());
|
||||
}
|
||||
template <typename DT, typename ... RP>
|
||||
void reset_except(View<DT, RP...> const& view) {
|
||||
if (view.data() != internal_view.data()) {
|
||||
reset();
|
||||
return;
|
||||
}
|
||||
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
|
||||
internal_view.data() + view.size(),
|
||||
internal_view.size() - view.size(),
|
||||
internal_view.label());
|
||||
}
|
||||
|
||||
void resize(const size_t n0 = 0,
|
||||
const size_t n1 = 0,
|
||||
const size_t n2 = 0,
|
||||
const size_t n3 = 0,
|
||||
const size_t n4 = 0,
|
||||
const size_t n5 = 0,
|
||||
const size_t n6 = 0) {
|
||||
|
||||
size_t arg_N[8] = {n0,n1,n2,n3,n4,n5,n6,0};
|
||||
const int i = internal_view.rank-1;
|
||||
arg_N[i] = unique_token.size();
|
||||
|
||||
::Kokkos::resize(internal_view,
|
||||
arg_N[0], arg_N[1], arg_N[2], arg_N[3],
|
||||
arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
|
||||
}
|
||||
|
||||
void realloc(const size_t n0 = 0,
|
||||
const size_t n1 = 0,
|
||||
const size_t n2 = 0,
|
||||
const size_t n3 = 0,
|
||||
const size_t n4 = 0,
|
||||
const size_t n5 = 0,
|
||||
const size_t n6 = 0) {
|
||||
|
||||
size_t arg_N[8] = {n0,n1,n2,n3,n4,n5,n6,0};
|
||||
const int i = internal_view.rank-1;
|
||||
arg_N[i] = unique_token.size();
|
||||
|
||||
::Kokkos::realloc(internal_view,
|
||||
arg_N[0], arg_N[1], arg_N[2], arg_N[3],
|
||||
arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
|
||||
}
|
||||
|
||||
protected:
|
||||
template <typename ... Args>
|
||||
inline original_reference_type at(int thread_id, Args ... args) const {
|
||||
return internal_view(args..., thread_id);
|
||||
}
|
||||
|
||||
protected:
|
||||
typedef Kokkos::Experimental::UniqueToken<
|
||||
ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type;
|
||||
|
||||
unique_token_type unique_token;
|
||||
internal_view_type internal_view;
|
||||
};
|
||||
|
||||
|
||||
/* This object has to be separate in order to store the thread ID, which cannot
|
||||
be obtained until one is inside a parallel construct, and may be relatively
|
||||
expensive to obtain at every contribution
|
||||
(calls a non-inlined function, looks up a thread-local variable).
|
||||
Due to the expense, it is sensible to query it at most once per parallel iterate
|
||||
(ideally once per thread, but parallel_for doesn't expose that)
|
||||
and then store it in a stack variable.
|
||||
ScatterAccess serves as a non-const object on the stack which can store the thread ID */
|
||||
|
||||
template <typename DataType
|
||||
,int Op
|
||||
,typename ExecSpace
|
||||
,typename Layout
|
||||
,int contribution
|
||||
,int override_contribution
|
||||
>
|
||||
class ScatterAccess<DataType
|
||||
,Op
|
||||
,ExecSpace
|
||||
,Layout
|
||||
,ScatterDuplicated
|
||||
,contribution
|
||||
,override_contribution>
|
||||
{
|
||||
public:
|
||||
typedef ScatterView<DataType, Layout, ExecSpace, Op, ScatterDuplicated, contribution> view_type;
|
||||
typedef typename view_type::original_value_type original_value_type;
|
||||
typedef Kokkos::Impl::Experimental::ScatterValue<
|
||||
original_value_type, Op, override_contribution> value_type;
|
||||
|
||||
inline ScatterAccess(view_type const& view_in)
|
||||
: view(view_in)
|
||||
, thread_id(view_in.unique_token.acquire()) {
|
||||
}
|
||||
|
||||
inline ~ScatterAccess() {
|
||||
if (thread_id != ~thread_id_type(0)) view.unique_token.release(thread_id);
|
||||
}
|
||||
|
||||
template <typename ... Args>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
value_type operator()(Args ... args) const {
|
||||
return view.at(thread_id, args...);
|
||||
}
|
||||
|
||||
template <typename Arg>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<view_type::original_view_type::rank == 1 &&
|
||||
std::is_integral<Arg>::value, value_type>::type
|
||||
operator[](Arg arg) const {
|
||||
return view.at(thread_id, arg);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
view_type const& view;
|
||||
|
||||
// simplify RAII by disallowing copies
|
||||
ScatterAccess(ScatterAccess const& other) = delete;
|
||||
ScatterAccess& operator=(ScatterAccess const& other) = delete;
|
||||
ScatterAccess& operator=(ScatterAccess&& other) = delete;
|
||||
|
||||
public:
|
||||
// do need to allow moves though, for the common
|
||||
// auto b = a.access();
|
||||
// that assignments turns into a move constructor call
|
||||
inline ScatterAccess(ScatterAccess&& other)
|
||||
: view(other.view)
|
||||
, thread_id(other.thread_id)
|
||||
{
|
||||
other.thread_id = ~thread_id_type(0);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typedef typename view_type::unique_token_type unique_token_type;
|
||||
typedef typename unique_token_type::size_type thread_id_type;
|
||||
thread_id_type thread_id;
|
||||
};
|
||||
|
||||
template <int Op = Kokkos::Experimental::ScatterSum,
|
||||
int duplication = -1,
|
||||
int contribution = -1,
|
||||
typename RT, typename ... RP>
|
||||
ScatterView
|
||||
< RT
|
||||
, typename ViewTraits<RT, RP...>::array_layout
|
||||
, typename ViewTraits<RT, RP...>::execution_space
|
||||
, Op
|
||||
/* just setting defaults if not specified... things got messy because the view type
|
||||
does not come before the duplication/contribution settings in the
|
||||
template parameter list */
|
||||
, duplication == -1 ? Kokkos::Impl::Experimental::DefaultDuplication<typename ViewTraits<RT, RP...>::execution_space>::value : duplication
|
||||
, contribution == -1 ?
|
||||
Kokkos::Impl::Experimental::DefaultContribution<
|
||||
typename ViewTraits<RT, RP...>::execution_space,
|
||||
(duplication == -1 ?
|
||||
Kokkos::Impl::Experimental::DefaultDuplication<
|
||||
typename ViewTraits<RT, RP...>::execution_space
|
||||
>::value
|
||||
: duplication
|
||||
)
|
||||
>::value
|
||||
: contribution
|
||||
>
|
||||
create_scatter_view(View<RT, RP...> const& original_view) {
|
||||
return original_view; // implicit ScatterView constructor call
|
||||
}
|
||||
|
||||
}} // namespace Kokkos::Experimental
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
template <typename DT1, typename DT2, typename LY, typename ES, int OP, int CT, int DP, typename ... VP>
|
||||
void
|
||||
contribute(View<DT1, VP...>& dest, Kokkos::Experimental::ScatterView<DT2, LY, ES, OP, CT, DP> const& src)
|
||||
{
|
||||
src.contribute_into(dest);
|
||||
}
|
||||
|
||||
}} // namespace Kokkos::Experimental
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template <typename DT, typename LY, typename ES, int OP, int CT, int DP, typename ... IS>
|
||||
void
|
||||
realloc(Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view, IS ... is)
|
||||
{
|
||||
scatter_view.realloc(is ...);
|
||||
}
|
||||
|
||||
template <typename DT, typename LY, typename ES, int OP, int CT, int DP, typename ... IS>
|
||||
void
|
||||
resize(Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view, IS ... is)
|
||||
{
|
||||
scatter_view.resize(is ...);
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif
|
||||
@ -56,6 +56,7 @@
|
||||
template< class Scalar, class Arg1Type = void>
|
||||
class vector : public DualView<Scalar*,LayoutLeft,Arg1Type> {
|
||||
|
||||
public:
|
||||
typedef Scalar value_type;
|
||||
typedef Scalar* pointer;
|
||||
typedef const Scalar* const_pointer;
|
||||
|
||||
@ -3,7 +3,13 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
|
||||
|
||||
SET(LIBRARIES kokkoscore)
|
||||
IF(NOT KOKKOS_HAS_TRILINOS)
|
||||
IF(KOKKOS_SEPARATE_LIBS)
|
||||
set(TEST_LINK_TARGETS kokkoscore)
|
||||
ELSE()
|
||||
set(TEST_LINK_TARGETS kokkos)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
IF(Kokkos_ENABLE_Pthread)
|
||||
TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
@ -12,7 +18,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
@ -23,7 +29,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
@ -34,7 +40,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
@ -45,7 +51,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
|
||||
@ -15,7 +15,8 @@ endif
|
||||
|
||||
CXXFLAGS = -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
LDFLAGS ?=
|
||||
override LDFLAGS += -lpthread
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
|
||||
@ -62,6 +62,7 @@
|
||||
#include <TestVector.hpp>
|
||||
#include <TestDualView.hpp>
|
||||
#include <TestDynamicView.hpp>
|
||||
#include <TestScatterView.hpp>
|
||||
|
||||
#include <Kokkos_DynRankView.hpp>
|
||||
#include <TestDynViewAPI.hpp>
|
||||
@ -201,10 +202,18 @@ void cuda_test_bitset()
|
||||
cuda_test_dualview_combinations(size); \
|
||||
}
|
||||
|
||||
#define CUDA_SCATTERVIEW_TEST( size ) \
|
||||
TEST_F( cuda, scatterview_##size##x) { \
|
||||
test_scatter_view<Kokkos::Cuda>(size); \
|
||||
}
|
||||
|
||||
CUDA_DUALVIEW_COMBINE_TEST( 10 )
|
||||
CUDA_VECTOR_COMBINE_TEST( 10 )
|
||||
CUDA_VECTOR_COMBINE_TEST( 3057 )
|
||||
|
||||
CUDA_SCATTERVIEW_TEST( 10 )
|
||||
|
||||
CUDA_SCATTERVIEW_TEST( 1000000 )
|
||||
|
||||
CUDA_INSERT_TEST(close, 100000, 90000, 100, 500)
|
||||
CUDA_INSERT_TEST(far, 100000, 90000, 100, 500)
|
||||
|
||||
@ -63,6 +63,8 @@
|
||||
#include <Kokkos_DynRankView.hpp>
|
||||
#include <TestDynViewAPI.hpp>
|
||||
|
||||
#include <TestScatterView.hpp>
|
||||
|
||||
#include <Kokkos_ErrorReporter.hpp>
|
||||
#include <TestErrorReporter.hpp>
|
||||
|
||||
@ -152,6 +154,11 @@ TEST_F( openmp , staticcrsgraph )
|
||||
test_dualview_combinations<int,Kokkos::OpenMP>(size); \
|
||||
}
|
||||
|
||||
#define OPENMP_SCATTERVIEW_TEST( size ) \
|
||||
TEST_F( openmp, scatterview_##size##x) { \
|
||||
test_scatter_view<Kokkos::OpenMP>(size); \
|
||||
}
|
||||
|
||||
OPENMP_INSERT_TEST(close, 100000, 90000, 100, 500, true)
|
||||
OPENMP_INSERT_TEST(far, 100000, 90000, 100, 500, false)
|
||||
OPENMP_FAILED_INSERT_TEST( 10000, 1000 )
|
||||
@ -161,6 +168,10 @@ OPENMP_VECTOR_COMBINE_TEST( 10 )
|
||||
OPENMP_VECTOR_COMBINE_TEST( 3057 )
|
||||
OPENMP_DUALVIEW_COMBINE_TEST( 10 )
|
||||
|
||||
OPENMP_SCATTERVIEW_TEST( 10 )
|
||||
|
||||
OPENMP_SCATTERVIEW_TEST( 1000000 )
|
||||
|
||||
#undef OPENMP_INSERT_TEST
|
||||
#undef OPENMP_FAILED_INSERT_TEST
|
||||
#undef OPENMP_ASSIGNEMENT_TEST
|
||||
|
||||
156
lib/kokkos/containers/unit_tests/TestScatterView.hpp
Normal file
156
lib/kokkos/containers/unit_tests/TestScatterView.hpp
Normal file
@ -0,0 +1,156 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_TEST_SCATTER_VIEW_HPP
|
||||
#define KOKKOS_TEST_SCATTER_VIEW_HPP
|
||||
|
||||
#include <Kokkos_ScatterView.hpp>
|
||||
|
||||
namespace Test {
|
||||
|
||||
template <typename ExecSpace, typename Layout, int duplication, int contribution>
|
||||
void test_scatter_view_config(int n)
|
||||
{
|
||||
Kokkos::View<double *[3], Layout, ExecSpace> original_view("original_view", n);
|
||||
{
|
||||
auto scatter_view = Kokkos::Experimental::create_scatter_view
|
||||
< Kokkos::Experimental::ScatterSum
|
||||
, duplication
|
||||
, contribution
|
||||
> (original_view);
|
||||
#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
|
||||
auto policy = Kokkos::RangePolicy<ExecSpace, int>(0, n);
|
||||
auto f = KOKKOS_LAMBDA(int i) {
|
||||
auto scatter_access = scatter_view.access();
|
||||
auto scatter_access_atomic = scatter_view.template access<Kokkos::Experimental::ScatterAtomic>();
|
||||
for (int j = 0; j < 10; ++j) {
|
||||
auto k = (i + j) % n;
|
||||
scatter_access(k, 0) += 4.2;
|
||||
scatter_access_atomic(k, 1) += 2.0;
|
||||
scatter_access(k, 2) += 1.0;
|
||||
}
|
||||
};
|
||||
Kokkos::parallel_for(policy, f, "scatter_view_test");
|
||||
#endif
|
||||
Kokkos::Experimental::contribute(original_view, scatter_view);
|
||||
scatter_view.reset_except(original_view);
|
||||
#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
|
||||
Kokkos::parallel_for(policy, f, "scatter_view_test");
|
||||
#endif
|
||||
Kokkos::Experimental::contribute(original_view, scatter_view);
|
||||
}
|
||||
#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
|
||||
auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), original_view);
|
||||
for (typename decltype(host_view)::size_type i = 0; i < host_view.dimension_0(); ++i) {
|
||||
auto val0 = host_view(i, 0);
|
||||
auto val1 = host_view(i, 1);
|
||||
auto val2 = host_view(i, 2);
|
||||
EXPECT_TRUE(std::fabs((val0 - 84.0) / 84.0) < 1e-15);
|
||||
EXPECT_TRUE(std::fabs((val1 - 40.0) / 40.0) < 1e-15);
|
||||
EXPECT_TRUE(std::fabs((val2 - 20.0) / 20.0) < 1e-15);
|
||||
}
|
||||
#endif
|
||||
{
|
||||
Kokkos::Experimental::ScatterView
|
||||
< double*[3]
|
||||
, Layout
|
||||
, ExecSpace
|
||||
, Kokkos::Experimental::ScatterSum
|
||||
, duplication
|
||||
, contribution
|
||||
>
|
||||
persistent_view("persistent", n);
|
||||
auto result_view = persistent_view.subview();
|
||||
contribute(result_view, persistent_view);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ExecSpace>
|
||||
struct TestDuplicatedScatterView {
|
||||
TestDuplicatedScatterView(int n) {
|
||||
test_scatter_view_config<ExecSpace, Kokkos::LayoutRight,
|
||||
Kokkos::Experimental::ScatterDuplicated,
|
||||
Kokkos::Experimental::ScatterNonAtomic>(n);
|
||||
test_scatter_view_config<ExecSpace, Kokkos::LayoutRight,
|
||||
Kokkos::Experimental::ScatterDuplicated,
|
||||
Kokkos::Experimental::ScatterAtomic>(n);
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
// disable duplicated instantiation with CUDA until
|
||||
// UniqueToken can support it
|
||||
template <>
|
||||
struct TestDuplicatedScatterView<Kokkos::Cuda> {
|
||||
TestDuplicatedScatterView(int) {
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template <typename ExecSpace>
|
||||
void test_scatter_view(int n)
|
||||
{
|
||||
// all of these configurations should compile okay, but only some of them are
|
||||
// correct and/or sensible in terms of memory use
|
||||
Kokkos::Experimental::UniqueToken<ExecSpace> unique_token{ExecSpace()};
|
||||
|
||||
// no atomics or duplication is only sensible if the execution space
|
||||
// is running essentially in serial (doesn't have to be Serial though,
|
||||
// we also test OpenMP with one thread: LAMMPS cares about that)
|
||||
if (unique_token.size() == 1) {
|
||||
test_scatter_view_config<ExecSpace, Kokkos::LayoutRight,
|
||||
Kokkos::Experimental::ScatterNonDuplicated,
|
||||
Kokkos::Experimental::ScatterNonAtomic>(n);
|
||||
}
|
||||
test_scatter_view_config<ExecSpace, Kokkos::LayoutRight,
|
||||
Kokkos::Experimental::ScatterNonDuplicated,
|
||||
Kokkos::Experimental::ScatterAtomic>(n);
|
||||
|
||||
TestDuplicatedScatterView<ExecSpace> duptest(n);
|
||||
}
|
||||
|
||||
} // namespace Test
|
||||
|
||||
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP
|
||||
|
||||
|
||||
@ -58,6 +58,7 @@
|
||||
#include <TestVector.hpp>
|
||||
#include <TestDualView.hpp>
|
||||
#include <TestDynamicView.hpp>
|
||||
#include <TestScatterView.hpp>
|
||||
|
||||
#include <iomanip>
|
||||
|
||||
@ -148,6 +149,11 @@ TEST_F( serial, bitset )
|
||||
test_dualview_combinations<int,Kokkos::Serial>(size); \
|
||||
}
|
||||
|
||||
#define SERIAL_SCATTERVIEW_TEST( size ) \
|
||||
TEST_F( serial, scatterview_##size##x) { \
|
||||
test_scatter_view<Kokkos::Serial>(size); \
|
||||
}
|
||||
|
||||
SERIAL_INSERT_TEST(close, 100000, 90000, 100, 500, true)
|
||||
SERIAL_INSERT_TEST(far, 100000, 90000, 100, 500, false)
|
||||
SERIAL_FAILED_INSERT_TEST( 10000, 1000 )
|
||||
@ -157,6 +163,10 @@ SERIAL_VECTOR_COMBINE_TEST( 10 )
|
||||
SERIAL_VECTOR_COMBINE_TEST( 3057 )
|
||||
SERIAL_DUALVIEW_COMBINE_TEST( 10 )
|
||||
|
||||
SERIAL_SCATTERVIEW_TEST( 10 )
|
||||
|
||||
SERIAL_SCATTERVIEW_TEST( 1000000 )
|
||||
|
||||
#undef SERIAL_INSERT_TEST
|
||||
#undef SERIAL_FAILED_INSERT_TEST
|
||||
#undef SERIAL_ASSIGNEMENT_TEST
|
||||
|
||||
@ -2,7 +2,9 @@
|
||||
|
||||
TRIBITS_SUBPACKAGE(Core)
|
||||
|
||||
ADD_SUBDIRECTORY(src)
|
||||
IF(KOKKOS_HAS_TRILINOS)
|
||||
ADD_SUBDIRECTORY(src)
|
||||
ENDIF()
|
||||
|
||||
TRIBITS_ADD_TEST_DIRECTORIES(unit_test)
|
||||
TRIBITS_ADD_TEST_DIRECTORIES(perf_test)
|
||||
|
||||
@ -2,6 +2,14 @@
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
IF(NOT KOKKOS_HAS_TRILINOS)
|
||||
IF(KOKKOS_SEPARATE_LIBS)
|
||||
set(TEST_LINK_TARGETS kokkoscore)
|
||||
ELSE()
|
||||
set(TEST_LINK_TARGETS kokkos)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
# warning: PerfTest_CustomReduction.cpp uses
|
||||
# ../../algorithms/src/Kokkos_Random.hpp
|
||||
# we'll just allow it to be included, but note
|
||||
@ -23,7 +31,7 @@ TRIBITS_ADD_EXECUTABLE(
|
||||
PerfTestExec
|
||||
SOURCES ${SOURCES}
|
||||
COMM serial mpi
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
|
||||
TRIBITS_ADD_TEST(
|
||||
|
||||
@ -17,7 +17,8 @@ endif
|
||||
CXXFLAGS = -O3
|
||||
#CXXFLAGS += -DGENERIC_REDUCER
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
LDFLAGS ?=
|
||||
override LDFLAGS += -lpthread
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
|
||||
@ -1,15 +1,4 @@
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Serial
|
||||
KOKKOS_HAVE_SERIAL
|
||||
"Whether to enable the Kokkos::Serial device. This device executes \"parallel\" kernels sequentially on a single CPU thread. It is enabled by default. If you disable this device, please enable at least one other CPU device, such as Kokkos::OpenMP or Kokkos::Threads."
|
||||
ON
|
||||
)
|
||||
|
||||
ASSERT_DEFINED(${PROJECT_NAME}_ENABLE_CXX11)
|
||||
ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_CUDA)
|
||||
|
||||
TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h)
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
@ -20,68 +9,90 @@ SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DI
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
SET(HEADERS_PUBLIC "")
|
||||
SET(HEADERS_PRIVATE "")
|
||||
SET(SOURCES "")
|
||||
IF(KOKKOS_LEGACY_TRIBITS)
|
||||
|
||||
FILE(GLOB HEADERS_PUBLIC Kokkos*.hpp)
|
||||
LIST( APPEND HEADERS_PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h )
|
||||
ASSERT_DEFINED(${PROJECT_NAME}_ENABLE_CXX11)
|
||||
ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_CUDA)
|
||||
|
||||
SET(HEADERS_PUBLIC "")
|
||||
SET(HEADERS_PRIVATE "")
|
||||
SET(SOURCES "")
|
||||
|
||||
FILE(GLOB HEADERS_PUBLIC Kokkos*.hpp)
|
||||
LIST( APPEND HEADERS_PUBLIC ${CMAKE_BINARY_DIR}/${PACKAGE_NAME}_config.h )
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
FILE(GLOB HEADERS_IMPL impl/*.hpp)
|
||||
FILE(GLOB SOURCES_IMPL impl/*.cpp)
|
||||
|
||||
LIST(APPEND HEADERS_PRIVATE ${HEADERS_IMPL} )
|
||||
LIST(APPEND SOURCES ${SOURCES_IMPL} )
|
||||
|
||||
INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
FILE(GLOB HEADERS_THREADS Threads/*.hpp)
|
||||
FILE(GLOB SOURCES_THREADS Threads/*.cpp)
|
||||
|
||||
LIST(APPEND HEADERS_PRIVATE ${HEADERS_THREADS} )
|
||||
LIST(APPEND SOURCES ${SOURCES_THREADS} )
|
||||
|
||||
INSTALL(FILES ${HEADERS_THREADS} DESTINATION ${TRILINOS_INCDIR}/Threads/)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
FILE(GLOB HEADERS_OPENMP OpenMP/*.hpp)
|
||||
FILE(GLOB SOURCES_OPENMP OpenMP/*.cpp)
|
||||
|
||||
LIST(APPEND HEADERS_PRIVATE ${HEADERS_OPENMP} )
|
||||
LIST(APPEND SOURCES ${SOURCES_OPENMP} )
|
||||
|
||||
INSTALL(FILES ${HEADERS_OPENMP} DESTINATION ${TRILINOS_INCDIR}/OpenMP/)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
FILE(GLOB HEADERS_CUDA Cuda/*.hpp)
|
||||
FILE(GLOB SOURCES_CUDA Cuda/*.cpp)
|
||||
|
||||
LIST(APPEND HEADERS_PRIVATE ${HEADERS_CUDA} )
|
||||
LIST(APPEND SOURCES ${SOURCES_CUDA} )
|
||||
|
||||
INSTALL(FILES ${HEADERS_CUDA} DESTINATION ${TRILINOS_INCDIR}/Cuda/)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
FILE(GLOB HEADERS_QTHREADS Qthreads/*.hpp)
|
||||
FILE(GLOB SOURCES_QTHREADS Qthreads/*.cpp)
|
||||
|
||||
LIST(APPEND HEADERS_PRIVATE ${HEADERS_QTHREADS} )
|
||||
LIST(APPEND SOURCES ${SOURCES_QTHREADS} )
|
||||
|
||||
INSTALL(FILES ${HEADERS_QTHREADS} DESTINATION ${TRILINOS_INCDIR}/Qthreads/)
|
||||
|
||||
TRIBITS_ADD_LIBRARY(
|
||||
kokkoscore
|
||||
HEADERS ${HEADERS_PUBLIC}
|
||||
NOINSTALLHEADERS ${HEADERS_PRIVATE}
|
||||
SOURCES ${SOURCES}
|
||||
DEPLIBS
|
||||
)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# In the new build system, sources are calculated by Makefile.kokkos
|
||||
else()
|
||||
|
||||
FILE(GLOB HEADERS_IMPL impl/*.hpp)
|
||||
FILE(GLOB SOURCES_IMPL impl/*.cpp)
|
||||
INSTALL (DIRECTORY
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/"
|
||||
DESTINATION ${TRILINOS_INCDIR}
|
||||
FILES_MATCHING PATTERN "*.hpp"
|
||||
)
|
||||
|
||||
LIST(APPEND HEADERS_PRIVATE ${HEADERS_IMPL} )
|
||||
LIST(APPEND SOURCES ${SOURCES_IMPL} )
|
||||
|
||||
INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/)
|
||||
TRIBITS_ADD_LIBRARY(
|
||||
kokkoscore
|
||||
SOURCES ${KOKKOS_CORE_SRCS}
|
||||
DEPLIBS
|
||||
)
|
||||
|
||||
endif()
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
FILE(GLOB HEADERS_THREADS Threads/*.hpp)
|
||||
FILE(GLOB SOURCES_THREADS Threads/*.cpp)
|
||||
|
||||
LIST(APPEND HEADERS_PRIVATE ${HEADERS_THREADS} )
|
||||
LIST(APPEND SOURCES ${SOURCES_THREADS} )
|
||||
|
||||
INSTALL(FILES ${HEADERS_THREADS} DESTINATION ${TRILINOS_INCDIR}/Threads/)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
FILE(GLOB HEADERS_OPENMP OpenMP/*.hpp)
|
||||
FILE(GLOB SOURCES_OPENMP OpenMP/*.cpp)
|
||||
|
||||
LIST(APPEND HEADERS_PRIVATE ${HEADERS_OPENMP} )
|
||||
LIST(APPEND SOURCES ${SOURCES_OPENMP} )
|
||||
|
||||
INSTALL(FILES ${HEADERS_OPENMP} DESTINATION ${TRILINOS_INCDIR}/OpenMP/)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
FILE(GLOB HEADERS_CUDA Cuda/*.hpp)
|
||||
FILE(GLOB SOURCES_CUDA Cuda/*.cpp)
|
||||
|
||||
LIST(APPEND HEADERS_PRIVATE ${HEADERS_CUDA} )
|
||||
LIST(APPEND SOURCES ${SOURCES_CUDA} )
|
||||
|
||||
INSTALL(FILES ${HEADERS_CUDA} DESTINATION ${TRILINOS_INCDIR}/Cuda/)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
FILE(GLOB HEADERS_QTHREADS Qthreads/*.hpp)
|
||||
FILE(GLOB SOURCES_QTHREADS Qthreads/*.cpp)
|
||||
|
||||
LIST(APPEND HEADERS_PRIVATE ${HEADERS_QTHREADS} )
|
||||
LIST(APPEND SOURCES ${SOURCES_QTHREADS} )
|
||||
|
||||
INSTALL(FILES ${HEADERS_QTHREADS} DESTINATION ${TRILINOS_INCDIR}/Qthreads/)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
TRIBITS_ADD_LIBRARY(
|
||||
kokkoscore
|
||||
HEADERS ${HEADERS_PUBLIC}
|
||||
NOINSTALLHEADERS ${HEADERS_PRIVATE}
|
||||
SOURCES ${SOURCES}
|
||||
DEPLIBS
|
||||
)
|
||||
|
||||
@ -366,7 +366,7 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >::
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
|
||||
SharedAllocationHeader header ;
|
||||
Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>::DeepCopy( & header , RecordBase::m_alloc_ptr , sizeof(SharedAllocationHeader) );
|
||||
Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>( & header , RecordBase::m_alloc_ptr , sizeof(SharedAllocationHeader) );
|
||||
|
||||
Kokkos::Profiling::deallocateData(
|
||||
Kokkos::Profiling::SpaceHandle(Kokkos::CudaSpace::name()),header.m_label,
|
||||
@ -446,7 +446,7 @@ SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
|
||||
);
|
||||
|
||||
// Copy to device memory
|
||||
Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>::DeepCopy( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) );
|
||||
Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) );
|
||||
}
|
||||
|
||||
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
|
||||
@ -655,7 +655,7 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr
|
||||
Header const * const head_cuda = alloc_ptr ? Header::get_header( alloc_ptr ) : (Header*) 0 ;
|
||||
|
||||
if ( alloc_ptr ) {
|
||||
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , head_cuda , sizeof(SharedAllocationHeader) );
|
||||
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>( & head , head_cuda , sizeof(SharedAllocationHeader) );
|
||||
}
|
||||
|
||||
RecordCuda * const record = alloc_ptr ? static_cast< RecordCuda * >( head.m_record ) : (RecordCuda *) 0 ;
|
||||
@ -724,7 +724,7 @@ print_records( std::ostream & s , const Kokkos::CudaSpace & , bool detail )
|
||||
if ( detail ) {
|
||||
do {
|
||||
if ( r->m_alloc_ptr ) {
|
||||
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
|
||||
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
|
||||
}
|
||||
else {
|
||||
head.m_label[0] = 0 ;
|
||||
@ -759,7 +759,7 @@ print_records( std::ostream & s , const Kokkos::CudaSpace & , bool detail )
|
||||
do {
|
||||
if ( r->m_alloc_ptr ) {
|
||||
|
||||
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
|
||||
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
|
||||
|
||||
//Formatting dependent on sizeof(uintptr_t)
|
||||
const char * format_string;
|
||||
|
||||
@ -648,10 +648,11 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ;
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTagFwd > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ;
|
||||
|
||||
public:
|
||||
|
||||
@ -721,7 +722,7 @@ public:
|
||||
}
|
||||
|
||||
// Reduce with final value at blockDim.y - 1 location.
|
||||
if ( cuda_single_inter_block_reduce_scan<false,ReducerTypeFwd,WorkTag>(
|
||||
if ( cuda_single_inter_block_reduce_scan<false,ReducerTypeFwd,WorkTagFwd>(
|
||||
ReducerConditional::select(m_functor , m_reducer) , blockIdx.x , gridDim.x ,
|
||||
kokkos_impl_cuda_shared_memory<size_type>() , m_scratch_space , m_scratch_flags ) ) {
|
||||
|
||||
@ -731,7 +732,7 @@ public:
|
||||
size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ;
|
||||
|
||||
if ( threadIdx.y == 0 ) {
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
|
||||
}
|
||||
|
||||
if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); }
|
||||
@ -766,11 +767,11 @@ public:
|
||||
|
||||
value_type init;
|
||||
ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &init);
|
||||
if(Impl::cuda_inter_block_reduction<ReducerTypeFwd,ValueJoin,WorkTag>
|
||||
if(Impl::cuda_inter_block_reduction<ReducerTypeFwd,ValueJoin,WorkTagFwd>
|
||||
(value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,max_active_thread)) {
|
||||
const unsigned id = threadIdx.y*blockDim.x + threadIdx.x;
|
||||
if(id==0) {
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
|
||||
*result = value;
|
||||
}
|
||||
}
|
||||
@ -875,10 +876,11 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ;
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTagFwd > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ;
|
||||
|
||||
public:
|
||||
|
||||
@ -942,7 +944,7 @@ public:
|
||||
|
||||
// Reduce with final value at blockDim.y - 1 location.
|
||||
// Problem: non power-of-two blockDim
|
||||
if ( cuda_single_inter_block_reduce_scan<false,ReducerTypeFwd,WorkTag>(
|
||||
if ( cuda_single_inter_block_reduce_scan<false,ReducerTypeFwd,WorkTagFwd>(
|
||||
ReducerConditional::select(m_functor , m_reducer) , blockIdx.x , gridDim.x ,
|
||||
kokkos_impl_cuda_shared_memory<size_type>() , m_scratch_space , m_scratch_flags ) ) {
|
||||
|
||||
@ -951,7 +953,7 @@ public:
|
||||
size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ;
|
||||
|
||||
if ( threadIdx.y == 0 ) {
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
|
||||
}
|
||||
|
||||
if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); }
|
||||
@ -983,11 +985,11 @@ public:
|
||||
|
||||
value_type init;
|
||||
ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &init);
|
||||
if(Impl::cuda_inter_block_reduction<ReducerTypeFwd,ValueJoin,WorkTag>
|
||||
if(Impl::cuda_inter_block_reduction<ReducerTypeFwd,ValueJoin,WorkTagFwd>
|
||||
(value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,max_active_thread)) {
|
||||
const unsigned id = threadIdx.y*blockDim.x + threadIdx.x;
|
||||
if(id==0) {
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
|
||||
*result = value;
|
||||
}
|
||||
}
|
||||
@ -1100,10 +1102,11 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ;
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTagFwd > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ;
|
||||
|
||||
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||
typedef typename ValueTraits::reference_type reference_type ;
|
||||
@ -1222,7 +1225,7 @@ public:
|
||||
size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ;
|
||||
|
||||
if ( threadIdx.y == 0 ) {
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
|
||||
}
|
||||
|
||||
if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); }
|
||||
@ -1260,7 +1263,7 @@ public:
|
||||
(value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,blockDim.y)) {
|
||||
const unsigned id = threadIdx.y*blockDim.x + threadIdx.x;
|
||||
if(id==0) {
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
|
||||
*result = value;
|
||||
}
|
||||
}
|
||||
|
||||
@ -69,7 +69,7 @@ void cuda_shfl( T & out , T const & in , int lane ,
|
||||
typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width )
|
||||
{
|
||||
*reinterpret_cast<int*>(&out) =
|
||||
__shfl( *reinterpret_cast<int const *>(&in) , lane , width );
|
||||
KOKKOS_IMPL_CUDA_SHFL( *reinterpret_cast<int const *>(&in) , lane , width );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
@ -83,7 +83,7 @@ void cuda_shfl( T & out , T const & in , int lane ,
|
||||
|
||||
for ( int i = 0 ; i < N ; ++i ) {
|
||||
reinterpret_cast<int*>(&out)[i] =
|
||||
__shfl( reinterpret_cast<int const *>(&in)[i] , lane , width );
|
||||
KOKKOS_IMPL_CUDA_SHFL( reinterpret_cast<int const *>(&in)[i] , lane , width );
|
||||
}
|
||||
}
|
||||
|
||||
@ -95,7 +95,7 @@ void cuda_shfl_down( T & out , T const & in , int delta ,
|
||||
typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width )
|
||||
{
|
||||
*reinterpret_cast<int*>(&out) =
|
||||
__shfl_down( *reinterpret_cast<int const *>(&in) , delta , width );
|
||||
KOKKOS_IMPL_CUDA_SHFL_DOWN( *reinterpret_cast<int const *>(&in) , delta , width );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
@ -109,7 +109,7 @@ void cuda_shfl_down( T & out , T const & in , int delta ,
|
||||
|
||||
for ( int i = 0 ; i < N ; ++i ) {
|
||||
reinterpret_cast<int*>(&out)[i] =
|
||||
__shfl_down( reinterpret_cast<int const *>(&in)[i] , delta , width );
|
||||
KOKKOS_IMPL_CUDA_SHFL_DOWN( reinterpret_cast<int const *>(&in)[i] , delta , width );
|
||||
}
|
||||
}
|
||||
|
||||
@ -121,7 +121,7 @@ void cuda_shfl_up( T & out , T const & in , int delta ,
|
||||
typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width )
|
||||
{
|
||||
*reinterpret_cast<int*>(&out) =
|
||||
__shfl_up( *reinterpret_cast<int const *>(&in) , delta , width );
|
||||
KOKKOS_IMPL_CUDA_SHFL_UP( *reinterpret_cast<int const *>(&in) , delta , width );
|
||||
}
|
||||
|
||||
template< typename T >
|
||||
@ -135,7 +135,7 @@ void cuda_shfl_up( T & out , T const & in , int delta ,
|
||||
|
||||
for ( int i = 0 ; i < N ; ++i ) {
|
||||
reinterpret_cast<int*>(&out)[i] =
|
||||
__shfl_up( reinterpret_cast<int const *>(&in)[i] , delta , width );
|
||||
KOKKOS_IMPL_CUDA_SHFL_UP( reinterpret_cast<int const *>(&in)[i] , delta , width );
|
||||
}
|
||||
}
|
||||
|
||||
@ -268,31 +268,31 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT
|
||||
if( id + 1 < int(gridDim.x) )
|
||||
join(value, tmp);
|
||||
}
|
||||
int active = __ballot(1);
|
||||
int active = KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
if (int(blockDim.x*blockDim.y) > 2) {
|
||||
value_type tmp = Kokkos::shfl_down(value, 2,32);
|
||||
if( id + 2 < int(gridDim.x) )
|
||||
join(value, tmp);
|
||||
}
|
||||
active += __ballot(1);
|
||||
active += KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
if (int(blockDim.x*blockDim.y) > 4) {
|
||||
value_type tmp = Kokkos::shfl_down(value, 4,32);
|
||||
if( id + 4 < int(gridDim.x) )
|
||||
join(value, tmp);
|
||||
}
|
||||
active += __ballot(1);
|
||||
active += KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
if (int(blockDim.x*blockDim.y) > 8) {
|
||||
value_type tmp = Kokkos::shfl_down(value, 8,32);
|
||||
if( id + 8 < int(gridDim.x) )
|
||||
join(value, tmp);
|
||||
}
|
||||
active += __ballot(1);
|
||||
active += KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
if (int(blockDim.x*blockDim.y) > 16) {
|
||||
value_type tmp = Kokkos::shfl_down(value, 16,32);
|
||||
if( id + 16 < int(gridDim.x) )
|
||||
join(value, tmp);
|
||||
}
|
||||
active += __ballot(1);
|
||||
active += KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
}
|
||||
}
|
||||
//The last block has in its thread=0 the global reduction value through "value"
|
||||
@ -432,31 +432,31 @@ cuda_inter_block_reduction( const ReducerType& reducer,
|
||||
if( id + 1 < int(gridDim.x) )
|
||||
reducer.join(value, tmp);
|
||||
}
|
||||
int active = __ballot(1);
|
||||
int active = KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
if (int(blockDim.x*blockDim.y) > 2) {
|
||||
value_type tmp = Kokkos::shfl_down(value, 2,32);
|
||||
if( id + 2 < int(gridDim.x) )
|
||||
reducer.join(value, tmp);
|
||||
}
|
||||
active += __ballot(1);
|
||||
active += KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
if (int(blockDim.x*blockDim.y) > 4) {
|
||||
value_type tmp = Kokkos::shfl_down(value, 4,32);
|
||||
if( id + 4 < int(gridDim.x) )
|
||||
reducer.join(value, tmp);
|
||||
}
|
||||
active += __ballot(1);
|
||||
active += KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
if (int(blockDim.x*blockDim.y) > 8) {
|
||||
value_type tmp = Kokkos::shfl_down(value, 8,32);
|
||||
if( id + 8 < int(gridDim.x) )
|
||||
reducer.join(value, tmp);
|
||||
}
|
||||
active += __ballot(1);
|
||||
active += KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
if (int(blockDim.x*blockDim.y) > 16) {
|
||||
value_type tmp = Kokkos::shfl_down(value, 16,32);
|
||||
if( id + 16 < int(gridDim.x) )
|
||||
reducer.join(value, tmp);
|
||||
}
|
||||
active += __ballot(1);
|
||||
active += KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -73,16 +73,16 @@ public:
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
UniqueToken() : m_buffer(0), m_count(0) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
KOKKOS_FUNCTION_DEFAULTED
|
||||
UniqueToken( const UniqueToken & ) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
KOKKOS_FUNCTION_DEFAULTED
|
||||
UniqueToken( UniqueToken && ) = default;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
KOKKOS_FUNCTION_DEFAULTED
|
||||
UniqueToken & operator=( const UniqueToken & ) = default ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
KOKKOS_FUNCTION_DEFAULTED
|
||||
UniqueToken & operator=( UniqueToken && ) = default ;
|
||||
|
||||
/// \brief upper bound for acquired values, i.e. 0 <= value < size()
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <Kokkos_Cuda.hpp>
|
||||
|
||||
#include <Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
|
||||
namespace Kokkos {
|
||||
|
||||
|
||||
@ -91,12 +91,12 @@ namespace Impl {
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int shfl(const int &val, const int& srcLane, const int& width ) {
|
||||
return __shfl(val,srcLane,width);
|
||||
return KOKKOS_IMPL_CUDA_SHFL(val,srcLane,width);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
float shfl(const float &val, const int& srcLane, const int& width ) {
|
||||
return __shfl(val,srcLane,width);
|
||||
return KOKKOS_IMPL_CUDA_SHFL(val,srcLane,width);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
@ -105,7 +105,7 @@ namespace Impl {
|
||||
) {
|
||||
Scalar tmp1 = val;
|
||||
float tmp = *reinterpret_cast<float*>(&tmp1);
|
||||
tmp = __shfl(tmp,srcLane,width);
|
||||
tmp = KOKKOS_IMPL_CUDA_SHFL(tmp,srcLane,width);
|
||||
return *reinterpret_cast<Scalar*>(&tmp);
|
||||
}
|
||||
|
||||
@ -113,8 +113,8 @@ namespace Impl {
|
||||
double shfl(const double &val, const int& srcLane, const int& width) {
|
||||
int lo = __double2loint(val);
|
||||
int hi = __double2hiint(val);
|
||||
lo = __shfl(lo,srcLane,width);
|
||||
hi = __shfl(hi,srcLane,width);
|
||||
lo = KOKKOS_IMPL_CUDA_SHFL(lo,srcLane,width);
|
||||
hi = KOKKOS_IMPL_CUDA_SHFL(hi,srcLane,width);
|
||||
return __hiloint2double(hi,lo);
|
||||
}
|
||||
|
||||
@ -123,8 +123,8 @@ namespace Impl {
|
||||
Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) == 8) ,int>::type& width) {
|
||||
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
|
||||
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
|
||||
lo = __shfl(lo,srcLane,width);
|
||||
hi = __shfl(hi,srcLane,width);
|
||||
lo = KOKKOS_IMPL_CUDA_SHFL(lo,srcLane,width);
|
||||
hi = KOKKOS_IMPL_CUDA_SHFL(hi,srcLane,width);
|
||||
const double tmp = __hiloint2double(hi,lo);
|
||||
return *(reinterpret_cast<const Scalar*>(&tmp));
|
||||
}
|
||||
@ -137,18 +137,18 @@ namespace Impl {
|
||||
s_val = val;
|
||||
|
||||
for(int i = 0; i<s_val.n; i++)
|
||||
r_val.fval[i] = __shfl(s_val.fval[i],srcLane,width);
|
||||
r_val.fval[i] = KOKKOS_IMPL_CUDA_SHFL(s_val.fval[i],srcLane,width);
|
||||
return r_val.value();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int shfl_down(const int &val, const int& delta, const int& width) {
|
||||
return __shfl_down(val,delta,width);
|
||||
return KOKKOS_IMPL_CUDA_SHFL_DOWN(val,delta,width);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
float shfl_down(const float &val, const int& delta, const int& width) {
|
||||
return __shfl_down(val,delta,width);
|
||||
return KOKKOS_IMPL_CUDA_SHFL_DOWN(val,delta,width);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
@ -156,7 +156,7 @@ namespace Impl {
|
||||
Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) {
|
||||
Scalar tmp1 = val;
|
||||
float tmp = *reinterpret_cast<float*>(&tmp1);
|
||||
tmp = __shfl_down(tmp,delta,width);
|
||||
tmp = KOKKOS_IMPL_CUDA_SHFL_DOWN(tmp,delta,width);
|
||||
return *reinterpret_cast<Scalar*>(&tmp);
|
||||
}
|
||||
|
||||
@ -164,8 +164,8 @@ namespace Impl {
|
||||
double shfl_down(const double &val, const int& delta, const int& width) {
|
||||
int lo = __double2loint(val);
|
||||
int hi = __double2hiint(val);
|
||||
lo = __shfl_down(lo,delta,width);
|
||||
hi = __shfl_down(hi,delta,width);
|
||||
lo = KOKKOS_IMPL_CUDA_SHFL_DOWN(lo,delta,width);
|
||||
hi = KOKKOS_IMPL_CUDA_SHFL_DOWN(hi,delta,width);
|
||||
return __hiloint2double(hi,lo);
|
||||
}
|
||||
|
||||
@ -174,8 +174,8 @@ namespace Impl {
|
||||
Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) {
|
||||
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
|
||||
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
|
||||
lo = __shfl_down(lo,delta,width);
|
||||
hi = __shfl_down(hi,delta,width);
|
||||
lo = KOKKOS_IMPL_CUDA_SHFL_DOWN(lo,delta,width);
|
||||
hi = KOKKOS_IMPL_CUDA_SHFL_DOWN(hi,delta,width);
|
||||
const double tmp = __hiloint2double(hi,lo);
|
||||
return *(reinterpret_cast<const Scalar*>(&tmp));
|
||||
}
|
||||
@ -188,18 +188,18 @@ namespace Impl {
|
||||
s_val = val;
|
||||
|
||||
for(int i = 0; i<s_val.n; i++)
|
||||
r_val.fval[i] = __shfl_down(s_val.fval[i],delta,width);
|
||||
r_val.fval[i] = KOKKOS_IMPL_CUDA_SHFL_DOWN(s_val.fval[i],delta,width);
|
||||
return r_val.value();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int shfl_up(const int &val, const int& delta, const int& width ) {
|
||||
return __shfl_up(val,delta,width);
|
||||
return KOKKOS_IMPL_CUDA_SHFL_UP(val,delta,width);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
float shfl_up(const float &val, const int& delta, const int& width ) {
|
||||
return __shfl_up(val,delta,width);
|
||||
return KOKKOS_IMPL_CUDA_SHFL_UP(val,delta,width);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
@ -207,7 +207,7 @@ namespace Impl {
|
||||
Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) {
|
||||
Scalar tmp1 = val;
|
||||
float tmp = *reinterpret_cast<float*>(&tmp1);
|
||||
tmp = __shfl_up(tmp,delta,width);
|
||||
tmp = KOKKOS_IMPL_CUDA_SHFL_UP(tmp,delta,width);
|
||||
return *reinterpret_cast<Scalar*>(&tmp);
|
||||
}
|
||||
|
||||
@ -215,8 +215,8 @@ namespace Impl {
|
||||
double shfl_up(const double &val, const int& delta, const int& width ) {
|
||||
int lo = __double2loint(val);
|
||||
int hi = __double2hiint(val);
|
||||
lo = __shfl_up(lo,delta,width);
|
||||
hi = __shfl_up(hi,delta,width);
|
||||
lo = KOKKOS_IMPL_CUDA_SHFL_UP(lo,delta,width);
|
||||
hi = KOKKOS_IMPL_CUDA_SHFL_UP(hi,delta,width);
|
||||
return __hiloint2double(hi,lo);
|
||||
}
|
||||
|
||||
@ -225,8 +225,8 @@ namespace Impl {
|
||||
Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) {
|
||||
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
|
||||
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
|
||||
lo = __shfl_up(lo,delta,width);
|
||||
hi = __shfl_up(hi,delta,width);
|
||||
lo = KOKKOS_IMPL_CUDA_SHFL_UP(lo,delta,width);
|
||||
hi = KOKKOS_IMPL_CUDA_SHFL_UP(hi,delta,width);
|
||||
const double tmp = __hiloint2double(hi,lo);
|
||||
return *(reinterpret_cast<const Scalar*>(&tmp));
|
||||
}
|
||||
@ -239,7 +239,7 @@ namespace Impl {
|
||||
s_val = val;
|
||||
|
||||
for(int i = 0; i<s_val.n; i++)
|
||||
r_val.fval[i] = __shfl_up(s_val.fval[i],delta,width);
|
||||
r_val.fval[i] = KOKKOS_IMPL_CUDA_SHFL_UP(s_val.fval[i],delta,width);
|
||||
return r_val.value();
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,12 @@
|
||||
#include<Kokkos_Macros.hpp>
|
||||
#if ( CUDA_VERSION < 9000 )
|
||||
#define KOKKOS_IMPL_CUDA_BALLOT(x) __ballot(x)
|
||||
#define KOKKOS_IMPL_CUDA_SHFL(x,y,z) __shfl(x,y,z)
|
||||
#define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) __shfl_up(x,y,z)
|
||||
#define KOKKOS_IMPL_CUDA_SHFL_DOWN(x,y,z) __shfl_down(x,y,z)
|
||||
#else
|
||||
#define KOKKOS_IMPL_CUDA_BALLOT(x) __ballot_sync(0xffffffff,x)
|
||||
#define KOKKOS_IMPL_CUDA_SHFL(x,y,z) __shfl_sync(0xffffffff,x,y,z)
|
||||
#define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) __shfl_up_sync(0xffffffff,x,y,z)
|
||||
#define KOKKOS_IMPL_CUDA_SHFL_DOWN(x,y,z) __shfl_down_sync(0xffffffff,x,y,z)
|
||||
#endif
|
||||
@ -251,7 +251,7 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined( __PGIC__ ) && !defined( __GNUC__ )
|
||||
#if defined( __PGIC__ )
|
||||
#define KOKKOS_COMPILER_PGI __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__
|
||||
|
||||
#if ( 1540 > KOKKOS_COMPILER_PGI )
|
||||
@ -268,7 +268,9 @@
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
#if ( 1800 > KOKKOS_COMPILER_INTEL )
|
||||
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
#endif
|
||||
|
||||
#if ( __INTEL_COMPILER > 1400 )
|
||||
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
@ -511,5 +513,11 @@
|
||||
#define KOKKOS_ENABLE_TASKDAG
|
||||
#endif
|
||||
|
||||
|
||||
#if defined ( KOKKOS_ENABLE_CUDA )
|
||||
#if ( 9000 <= CUDA_VERSION )
|
||||
#define KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND
|
||||
#endif
|
||||
#endif
|
||||
#endif // #ifndef KOKKOS_MACROS_HPP
|
||||
|
||||
|
||||
@ -51,6 +51,27 @@
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <impl/Kokkos_SharedAlloc.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
/* Report violation of size constraints:
|
||||
* min_block_alloc_size <= max_block_alloc_size
|
||||
* max_block_alloc_size <= min_superblock_size
|
||||
* min_superblock_size <= max_superblock_size
|
||||
* min_superblock_size <= min_total_alloc_size
|
||||
* min_superblock_size <= min_block_alloc_size *
|
||||
* max_block_per_superblock
|
||||
*/
|
||||
void memory_pool_bounds_verification
|
||||
( size_t min_block_alloc_size
|
||||
, size_t max_block_alloc_size
|
||||
, size_t min_superblock_size
|
||||
, size_t max_superblock_size
|
||||
, size_t max_block_per_superblock
|
||||
, size_t min_total_alloc_size
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< typename DeviceType >
|
||||
@ -332,39 +353,23 @@ public:
|
||||
|
||||
//--------------------------------------------------
|
||||
|
||||
{
|
||||
/* Enforce size constraints:
|
||||
* min_block_alloc_size <= max_block_alloc_size
|
||||
* max_block_alloc_size <= min_superblock_size
|
||||
* min_superblock_size <= max_superblock_size
|
||||
* min_superblock_size <= min_total_alloc_size
|
||||
* min_superblock_size <= min_block_alloc_size *
|
||||
* max_block_per_superblock
|
||||
*/
|
||||
/* Enforce size constraints:
|
||||
* min_block_alloc_size <= max_block_alloc_size
|
||||
* max_block_alloc_size <= min_superblock_size
|
||||
* min_superblock_size <= max_superblock_size
|
||||
* min_superblock_size <= min_total_alloc_size
|
||||
* min_superblock_size <= min_block_alloc_size *
|
||||
* max_block_per_superblock
|
||||
*/
|
||||
|
||||
const size_t max_superblock =
|
||||
min_block_alloc_size * max_block_per_superblock ;
|
||||
|
||||
if ( ( size_t(max_superblock_size) < min_superblock_size ) ||
|
||||
( min_total_alloc_size < min_superblock_size ) ||
|
||||
( max_superblock < min_superblock_size ) ||
|
||||
( min_superblock_size < max_block_alloc_size ) ||
|
||||
( max_block_alloc_size < min_block_alloc_size ) ) {
|
||||
|
||||
#if 1
|
||||
printf( " MemoryPool min_block_alloc_size(%ld) max_block_alloc_size(%ld) min_superblock_size(%ld) min_total_alloc_size(%ld) ; max_superblock_size(%ld) max_block_per_superblock(%ld)\n"
|
||||
, min_block_alloc_size
|
||||
Kokkos::Impl::memory_pool_bounds_verification
|
||||
( min_block_alloc_size
|
||||
, max_block_alloc_size
|
||||
, min_superblock_size
|
||||
, max_superblock_size
|
||||
, max_block_per_superblock
|
||||
, min_total_alloc_size
|
||||
, size_t(max_superblock_size)
|
||||
, size_t(max_block_per_superblock)
|
||||
);
|
||||
#endif
|
||||
|
||||
Kokkos::abort("Kokkos MemoryPool size constraint violation");
|
||||
}
|
||||
}
|
||||
|
||||
//--------------------------------------------------
|
||||
// Block and superblock size is power of two:
|
||||
|
||||
@ -204,6 +204,7 @@ struct reduction_identity<double> {
|
||||
KOKKOS_FORCEINLINE_FUNCTION constexpr static double min() {return DBL_MAX;}
|
||||
};
|
||||
|
||||
#if !defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA )
|
||||
template<>
|
||||
struct reduction_identity<long double> {
|
||||
KOKKOS_FORCEINLINE_FUNCTION constexpr static long double sum() {return static_cast<long double>(0.0);}
|
||||
@ -211,6 +212,7 @@ struct reduction_identity<long double> {
|
||||
KOKKOS_FORCEINLINE_FUNCTION constexpr static long double max() {return -LDBL_MAX;}
|
||||
KOKKOS_FORCEINLINE_FUNCTION constexpr static long double min() {return LDBL_MAX;}
|
||||
};
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -78,7 +78,7 @@ struct pair
|
||||
/// This calls the default constructors of T1 and T2. It won't
|
||||
/// compile if those default constructors are not defined and
|
||||
/// public.
|
||||
KOKKOS_FORCEINLINE_FUNCTION constexpr
|
||||
KOKKOS_FUNCTION_DEFAULTED constexpr
|
||||
pair() = default ;
|
||||
|
||||
/// \brief Constructor that takes both elements of the pair.
|
||||
@ -458,7 +458,7 @@ struct pair<T1,void>
|
||||
first_type first;
|
||||
enum { second = 0 };
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION constexpr
|
||||
KOKKOS_FUNCTION_DEFAULTED constexpr
|
||||
pair() = default ;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION constexpr
|
||||
|
||||
@ -241,7 +241,7 @@ void parallel_for( const std::string & str
|
||||
std::cout << "KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
|
||||
#endif
|
||||
|
||||
parallel_for(policy,functor,str);
|
||||
::Kokkos::parallel_for(policy,functor,str);
|
||||
|
||||
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
|
||||
Kokkos::fence();
|
||||
@ -487,7 +487,7 @@ void parallel_scan( const std::string& str
|
||||
std::cout << "KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
|
||||
#endif
|
||||
|
||||
parallel_scan(policy,functor,str);
|
||||
::Kokkos::parallel_scan(policy,functor,str);
|
||||
|
||||
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
|
||||
Kokkos::fence();
|
||||
|
||||
111
lib/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp
Normal file
111
lib/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp
Normal file
@ -0,0 +1,111 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOSP_PROFILE_SECTION_HPP
|
||||
#define KOKKOSP_PROFILE_SECTION_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <impl/Kokkos_Profiling_Interface.hpp>
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Profiling {
|
||||
|
||||
class ProfilingSection {
|
||||
|
||||
public:
|
||||
ProfilingSection(const std::string& sectionName) :
|
||||
secName(sectionName) {
|
||||
|
||||
#if defined( KOKKOS_ENABLE_PROFILING )
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::createProfileSection(secName, &secID);
|
||||
}
|
||||
#else
|
||||
secID = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void start() {
|
||||
#if defined( KOKKOS_ENABLE_PROFILING )
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::startSection(secID);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void stop() {
|
||||
#if defined( KOKKOS_ENABLE_PROFILING )
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::stopSection(secID);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
~ProfilingSection() {
|
||||
#if defined( KOKKOS_ENABLE_PROFILING )
|
||||
if(Kokkos::Profiling::profileLibraryLoaded()) {
|
||||
Kokkos::Profiling::destroyProfileSection(secID);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string getName() {
|
||||
return secName;
|
||||
}
|
||||
|
||||
uint32_t getSectionID() {
|
||||
return secID;
|
||||
}
|
||||
|
||||
protected:
|
||||
const std::string secName;
|
||||
uint32_t secID;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -145,7 +145,7 @@ public:
|
||||
unsigned use_cores_per_numa = 0 ,
|
||||
bool allow_asynchronous_threadpool = false);
|
||||
|
||||
static int is_initialized();
|
||||
static bool is_initialized();
|
||||
|
||||
/** \brief Return the maximum amount of concurrency. */
|
||||
static int concurrency() {return 1;};
|
||||
@ -424,11 +424,13 @@ private:
|
||||
typedef typename Policy::work_tag WorkTag ;
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
|
||||
|
||||
typedef typename Analysis::pointer_type pointer_type ;
|
||||
typedef typename Analysis::reference_type reference_type ;
|
||||
@ -488,7 +490,7 @@ public:
|
||||
|
||||
this-> template exec< WorkTag >( update );
|
||||
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::
|
||||
final( ReducerConditional::select(m_functor , m_reducer) , ptr );
|
||||
}
|
||||
|
||||
@ -675,12 +677,13 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
typedef typename ReducerTypeFwd::value_type ValueType;
|
||||
|
||||
typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
|
||||
|
||||
typedef typename Analysis::pointer_type pointer_type ;
|
||||
typedef typename Analysis::reference_type reference_type ;
|
||||
@ -735,7 +738,7 @@ public:
|
||||
|
||||
this-> exec( update );
|
||||
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::
|
||||
final( ReducerConditional::select(m_functor , m_reducer) , ptr );
|
||||
}
|
||||
|
||||
@ -878,8 +881,9 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
|
||||
|
||||
typedef typename Analysis::pointer_type pointer_type ;
|
||||
typedef typename Analysis::reference_type reference_type ;
|
||||
@ -940,7 +944,7 @@ public:
|
||||
|
||||
this-> template exec< WorkTag >( data , update );
|
||||
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::
|
||||
final( ReducerConditional::select(m_functor , m_reducer) , ptr );
|
||||
}
|
||||
|
||||
|
||||
@ -5,51 +5,44 @@ endif
|
||||
|
||||
PREFIX ?= /usr/local/lib/kokkos
|
||||
|
||||
default: messages build-lib
|
||||
echo "End Build"
|
||||
default: build-lib
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
|
||||
CXX ?= $(KOKKOS_PATH)/bin/nvcc_wrapper
|
||||
else
|
||||
CXX = g++
|
||||
CXX ?= g++
|
||||
endif
|
||||
|
||||
CXXFLAGS = -O3
|
||||
CXXFLAGS ?= -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?=
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
PWD = $(shell pwd)
|
||||
|
||||
KOKKOS_HEADERS_INCLUDE = $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
|
||||
KOKKOS_HEADERS_INCLUDE_IMPL = $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
|
||||
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
|
||||
KOKKOS_HEADERS_INCLUDE_IMPL += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
|
||||
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
|
||||
include $(KOKKOS_PATH)/core/src/Makefile.generate_header_lists
|
||||
include $(KOKKOS_PATH)/core/src/Makefile.generate_build_files
|
||||
|
||||
CONDITIONAL_COPIES =
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
|
||||
CONDITIONAL_COPIES += copy-cuda
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
|
||||
CONDITIONAL_COPIES += copy-threads
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
|
||||
KOKKOS_HEADERS_QTHREADS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
|
||||
CONDITIONAL_COPIES += copy-qthreads
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
|
||||
CONDITIONAL_COPIES += copy-openmp
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
|
||||
CONDITIONAL_COPIES += copy-rocm
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_OS),CYGWIN)
|
||||
COPY_FLAG = -u
|
||||
endif
|
||||
@ -66,104 +59,7 @@ else
|
||||
KOKKOS_DEBUG_CMAKE = ON
|
||||
endif
|
||||
|
||||
messages:
|
||||
echo "Start Build"
|
||||
|
||||
build-makefile-kokkos:
|
||||
rm -f Makefile.kokkos
|
||||
echo "#Global Settings used to generate this library" >> Makefile.kokkos
|
||||
echo "KOKKOS_PATH = $(PREFIX)" >> Makefile.kokkos
|
||||
echo "KOKKOS_DEVICES = $(KOKKOS_DEVICES)" >> Makefile.kokkos
|
||||
echo "KOKKOS_ARCH = $(KOKKOS_ARCH)" >> Makefile.kokkos
|
||||
echo "KOKKOS_DEBUG = $(KOKKOS_DEBUG)" >> Makefile.kokkos
|
||||
echo "KOKKOS_USE_TPLS = $(KOKKOS_USE_TPLS)" >> Makefile.kokkos
|
||||
echo "KOKKOS_CXX_STANDARD = $(KOKKOS_CXX_STANDARD)" >> Makefile.kokkos
|
||||
echo "KOKKOS_OPTIONS = $(KOKKOS_OPTIONS)" >> Makefile.kokkos
|
||||
echo "KOKKOS_CUDA_OPTIONS = $(KOKKOS_CUDA_OPTIONS)" >> Makefile.kokkos
|
||||
echo "CXX ?= $(CXX)" >> Makefile.kokkos
|
||||
echo "NVCC_WRAPPER ?= $(PREFIX)/bin/nvcc_wrapper" >> Makefile.kokkos
|
||||
echo "" >> Makefile.kokkos
|
||||
echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> Makefile.kokkos
|
||||
echo "KOKKOS_HEADERS = $(KOKKOS_HEADERS)" >> Makefile.kokkos
|
||||
echo "KOKKOS_SRC = $(KOKKOS_SRC)" >> Makefile.kokkos
|
||||
echo "" >> Makefile.kokkos
|
||||
echo "#Variables used in application Makefiles" >> Makefile.kokkos
|
||||
echo "KOKKOS_OS = $(KOKKOS_OS)" >> Makefile.kokkos
|
||||
echo "KOKKOS_CPP_DEPENDS = $(KOKKOS_CPP_DEPENDS)" >> Makefile.kokkos
|
||||
echo "KOKKOS_CXXFLAGS = $(KOKKOS_CXXFLAGS)" >> Makefile.kokkos
|
||||
echo "KOKKOS_CPPFLAGS = $(KOKKOS_CPPFLAGS)" >> Makefile.kokkos
|
||||
echo "KOKKOS_LINK_DEPENDS = $(KOKKOS_LINK_DEPENDS)" >> Makefile.kokkos
|
||||
echo "KOKKOS_LIBS = $(KOKKOS_LIBS)" >> Makefile.kokkos
|
||||
echo "KOKKOS_LDFLAGS = $(KOKKOS_LDFLAGS)" >> Makefile.kokkos
|
||||
echo "" >> Makefile.kokkos
|
||||
echo "#Internal settings which need to propagated for Kokkos examples" >> Makefile.kokkos
|
||||
echo "KOKKOS_INTERNAL_USE_CUDA = ${KOKKOS_INTERNAL_USE_CUDA}" >> Makefile.kokkos
|
||||
echo "KOKKOS_INTERNAL_USE_QTHREADS = ${KOKKOS_INTERNAL_USE_QTHREADS}" >> Makefile.kokkos
|
||||
echo "KOKKOS_INTERNAL_USE_OPENMP = ${KOKKOS_INTERNAL_USE_OPENMP}" >> Makefile.kokkos
|
||||
echo "KOKKOS_INTERNAL_USE_PTHREADS = ${KOKKOS_INTERNAL_USE_PTHREADS}" >> Makefile.kokkos
|
||||
echo "" >> Makefile.kokkos
|
||||
echo "#Fake kokkos-clean target" >> Makefile.kokkos
|
||||
echo "kokkos-clean:" >> Makefile.kokkos
|
||||
echo "" >> Makefile.kokkos
|
||||
sed \
|
||||
-e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \
|
||||
-e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \
|
||||
-e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \
|
||||
-e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \
|
||||
-e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \
|
||||
-e 's|= KokkosCore_config.h|= $(PREFIX)/include/KokkosCore_config.h|g' Makefile.kokkos \
|
||||
> Makefile.kokkos.tmp
|
||||
mv -f Makefile.kokkos.tmp Makefile.kokkos
|
||||
|
||||
build-cmake-kokkos:
|
||||
rm -f kokkos.cmake
|
||||
echo "#Global Settings used to generate this library" >> kokkos.cmake
|
||||
echo "set(KOKKOS_PATH $(PREFIX) CACHE PATH \"Kokkos installation path\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_DEVICES $(KOKKOS_DEVICES) CACHE STRING \"Kokkos devices list\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_ARCH $(KOKKOS_ARCH) CACHE STRING \"Kokkos architecture flags\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_DEBUG $(KOKKOS_DEBUG_CMAKE) CACHE BOOL \"Kokkos debug enabled ?)\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_USE_TPLS $(KOKKOS_USE_TPLS) CACHE STRING \"Kokkos templates list\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_CXX_STANDARD $(KOKKOS_CXX_STANDARD) CACHE STRING \"Kokkos C++ standard\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_OPTIONS $(KOKKOS_OPTIONS) CACHE STRING \"Kokkos options\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_CUDA_OPTIONS $(KOKKOS_CUDA_OPTIONS) CACHE STRING \"Kokkos Cuda options\")" >> kokkos.cmake
|
||||
echo "if(NOT $ENV{CXX})" >> kokkos.cmake
|
||||
echo ' message(WARNING "You are currently using compiler $${CMAKE_CXX_COMPILER} while Kokkos was built with $(CXX) ; make sure this is the behavior you intended to be.")' >> kokkos.cmake
|
||||
echo "endif()" >> kokkos.cmake
|
||||
echo "if(NOT DEFINED ENV{NVCC_WRAPPER})" >> kokkos.cmake
|
||||
echo " set(NVCC_WRAPPER \"$(NVCC_WRAPPER)\" CACHE FILEPATH \"Path to command nvcc_wrapper\")" >> kokkos.cmake
|
||||
echo "else()" >> kokkos.cmake
|
||||
echo ' set(NVCC_WRAPPER $$ENV{NVCC_WRAPPER} CACHE FILEPATH "Path to command nvcc_wrapper")' >> kokkos.cmake
|
||||
echo "endif()" >> kokkos.cmake
|
||||
echo "" >> kokkos.cmake
|
||||
echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> kokkos.cmake
|
||||
echo "set(KOKKOS_HEADERS \"$(KOKKOS_HEADERS)\" CACHE STRING \"Kokkos headers list\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_SRC \"$(KOKKOS_SRC)\" CACHE STRING \"Kokkos source list\")" >> kokkos.cmake
|
||||
echo "" >> kokkos.cmake
|
||||
echo "#Variables used in application Makefiles" >> kokkos.cmake
|
||||
echo "set(KOKKOS_CPP_DEPENDS \"$(KOKKOS_CPP_DEPENDS)\" CACHE STRING \"\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_CXXFLAGS \"$(KOKKOS_CXXFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_CPPFLAGS \"$(KOKKOS_CPPFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_LINK_DEPENDS \"$(KOKKOS_LINK_DEPENDS)\" CACHE STRING \"\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_LIBS \"$(KOKKOS_LIBS)\" CACHE STRING \"\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_LDFLAGS \"$(KOKKOS_LDFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake
|
||||
echo "" >> kokkos.cmake
|
||||
echo "#Internal settings which need to propagated for Kokkos examples" >> kokkos.cmake
|
||||
echo "set(KOKKOS_INTERNAL_USE_CUDA \"${KOKKOS_INTERNAL_USE_CUDA}\" CACHE STRING \"\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_INTERNAL_USE_OPENMP \"${KOKKOS_INTERNAL_USE_OPENMP}\" CACHE STRING \"\")" >> kokkos.cmake
|
||||
echo "set(KOKKOS_INTERNAL_USE_PTHREADS \"${KOKKOS_INTERNAL_USE_PTHREADS}\" CACHE STRING \"\")" >> kokkos.cmake
|
||||
echo "mark_as_advanced(KOKKOS_HEADERS KOKKOS_SRC KOKKOS_INTERNAL_USE_CUDA KOKKOS_INTERNAL_USE_OPENMP KOKKOS_INTERNAL_USE_PTHREADS)" >> kokkos.cmake
|
||||
echo "" >> kokkos.cmake
|
||||
sed \
|
||||
-e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \
|
||||
-e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \
|
||||
-e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \
|
||||
-e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \
|
||||
-e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \
|
||||
-e 's|= KokkosCore_config.h|= $(PREFIX)/include/KokkosCore_config.h|g' kokkos.cmake \
|
||||
> kokkos.cmake.tmp
|
||||
mv -f kokkos.cmake.tmp kokkos.cmake
|
||||
|
||||
build-lib: build-makefile-kokkos build-cmake-kokkos $(KOKKOS_LINK_DEPENDS)
|
||||
build-lib: $(KOKKOS_LINK_DEPENDS)
|
||||
|
||||
mkdir:
|
||||
mkdir -p $(PREFIX)
|
||||
@ -188,14 +84,18 @@ copy-openmp: mkdir
|
||||
mkdir -p $(PREFIX)/include/OpenMP
|
||||
cp $(COPY_FLAG) $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP
|
||||
|
||||
install: mkdir $(CONDITIONAL_COPIES) build-lib
|
||||
copy-rocm: mkdir
|
||||
mkdir -p $(PREFIX)/include/ROCm
|
||||
cp $(COPY_FLAG) $(KOKKOS_HEADERS_ROCM) $(PREFIX)/include/ROCm
|
||||
|
||||
install: mkdir $(CONDITIONAL_COPIES) build-lib generate_build_settings
|
||||
cp $(COPY_FLAG) $(NVCC_WRAPPER) $(PREFIX)/bin
|
||||
cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include
|
||||
cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl
|
||||
cp $(COPY_FLAG) Makefile.kokkos $(PREFIX)
|
||||
cp $(COPY_FLAG) kokkos.cmake $(PREFIX)
|
||||
cp $(COPY_FLAG) $(KOKKOS_MAKEFILE) $(PREFIX)
|
||||
cp $(COPY_FLAG) $(KOKKOS_CMAKEFILE) $(PREFIX)
|
||||
cp $(COPY_FLAG) libkokkos.a $(PREFIX)/lib
|
||||
cp $(COPY_FLAG) KokkosCore_config.h $(PREFIX)/include
|
||||
cp $(COPY_FLAG) $(KOKKOS_CONFIG_HEADER) $(PREFIX)/include
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f Makefile.kokkos
|
||||
rm -f $(KOKKOS_MAKEFILE) $(KOKKOS_CMAKEFILE)
|
||||
|
||||
100
lib/kokkos/core/src/Makefile.generate_build_files
Normal file
100
lib/kokkos/core/src/Makefile.generate_build_files
Normal file
@ -0,0 +1,100 @@
|
||||
# This file is responsible for generating files which will be used
|
||||
# by build system (make and cmake) in scenarios where the kokkos library
|
||||
# gets installed before building the app
|
||||
|
||||
# These files are generated by this makefile
|
||||
KOKKOS_MAKEFILE=Makefile.kokkos
|
||||
KOKKOS_CMAKEFILE=kokkos_generated_settings.cmake
|
||||
|
||||
ifeq ($(KOKKOS_DEBUG),"no")
|
||||
KOKKOS_DEBUG_CMAKE = OFF
|
||||
else
|
||||
KOKKOS_DEBUG_CMAKE = ON
|
||||
endif
|
||||
|
||||
# Functions for generating makefile and cmake file
|
||||
# In calling these routines, do not put space after ,
|
||||
# e.g., $(call kokkos_append_var,KOKKOS_PATH,$(PREFIX))
|
||||
kokkos_append_makefile = echo $1 >> $(KOKKOS_MAKEFILE)
|
||||
kokkos_append_cmakefile = echo $1 >> $(KOKKOS_CMAKEFILE)
|
||||
|
||||
kokkos_setvar_cmakefile = echo set\($1 $2\) >> $(KOKKOS_CMAKEFILE)
|
||||
kokkos_setlist_cmakefile = echo set\($1 \"$2\"\) >> $(KOKKOS_CMAKEFILE)
|
||||
|
||||
kokkos_appendvar_makefile = echo $1 = $($(1)) >> $(KOKKOS_MAKEFILE)
|
||||
kokkos_appendvar2_makefile = echo $1 ?= $($(1)) >> $(KOKKOS_MAKEFILE)
|
||||
kokkos_appendvar_cmakefile = echo set\($1 $($(1)) CACHE $2 FORCE\) >> $(KOKKOS_CMAKEFILE)
|
||||
kokkos_appendval_makefile = echo $1 = $2 >> $(KOKKOS_MAKEFILE)
|
||||
kokkos_appendval_cmakefile = echo set\($1 $2 CACHE $3 FORCE\) >> $(KOKKOS_CMAKEFILE)
|
||||
|
||||
kokkos_append_string = $(call kokkos_append_makefile,$1); $(call kokkos_append_cmakefile,$1)
|
||||
kokkos_append_var = $(call kokkos_appendvar_makefile,$1); $(call kokkos_appendvar_cmakefile,$1,$2)
|
||||
kokkos_append_var2 = $(call kokkos_appendvar2_makefile,$1); $(call kokkos_appendvar_cmakefile,$1,$2)
|
||||
kokkos_append_varval = $(call kokkos_appendval_makefile,$1,$2); $(call kokkos_appendval_cmakefile,$1,$2,$3)
|
||||
|
||||
generate_build_settings: $(KOKKOS_CONFIG_HEADER)
|
||||
@rm -f $(KOKKOS_MAKEFILE)
|
||||
@rm -f $(KOKKOS_CMAKEFILE)
|
||||
@$(call kokkos_append_string, "#Global Settings used to generate this library")
|
||||
@$(call kokkos_append_varval,KOKKOS_PATH,$(KOKKOS_INSTALL_PATH),'FILEPATH "Kokkos installation path"')
|
||||
@$(call kokkos_append_var,KOKKOS_DEVICES,'STRING "Kokkos devices list"')
|
||||
@$(call kokkos_append_var,KOKKOS_ARCH,'STRING "Kokkos architecture flags"')
|
||||
@$(call kokkos_appendvar_makefile,KOKKOS_DEBUG)
|
||||
@$(call kokkos_appendvar_cmakefile,KOKKOS_DEBUG_CMAKE,'BOOL "Kokkos debug enabled ?"')
|
||||
@$(call kokkos_append_var,KOKKOS_USE_TPLS,'STRING "Kokkos templates list"')
|
||||
@$(call kokkos_append_var,KOKKOS_CXX_STANDARD,'STRING "Kokkos C++ standard"')
|
||||
@$(call kokkos_append_var,KOKKOS_OPTIONS,'STRING "Kokkos options"')
|
||||
@$(call kokkos_append_var,KOKKOS_CUDA_OPTIONS,'STRING "Kokkos Cuda options"')
|
||||
@$(call kokkos_appendvar2,CXX,'KOKKOS C++ Compiler')
|
||||
@$(call kokkos_append_cmakefile,"if(NOT DEFINED ENV{NVCC_WRAPPER})")
|
||||
@$(call kokkos_append_var2,NVCC_WRAPPER,'FILEPATH "Path to command nvcc_wrapper"')
|
||||
@$(call kokkos_append_cmakefile,"else()")
|
||||
@$(call kokkos_append_cmakefile,' set(NVCC_WRAPPER $$ENV{NVCC_WRAPPER} CACHE FILEPATH "Path to command nvcc_wrapper")')
|
||||
@$(call kokkos_append_cmakefile,"endif()")
|
||||
@$(call kokkos_append_string,"")
|
||||
@$(call kokkos_append_string,"#Source and Header files of Kokkos relative to KOKKOS_PATH")
|
||||
@$(call kokkos_append_var,KOKKOS_HEADERS,'STRING "Kokkos headers list"')
|
||||
@$(call kokkos_append_var,KOKKOS_HEADERS_IMPL,'STRING "Kokkos headers impl list"')
|
||||
@$(call kokkos_append_var,KOKKOS_HEADERS_CUDA,'STRING "Kokkos headers Cuda list"')
|
||||
@$(call kokkos_append_var,KOKKOS_HEADERS_OPENMP,'STRING "Kokkos headers OpenMP list"')
|
||||
@$(call kokkos_append_var,KOKKOS_HEADERS_ROCM,'STRING "Kokkos headers ROCm list"')
|
||||
@$(call kokkos_append_var,KOKKOS_HEADERS_THREADS,'STRING "Kokkos headers Threads list"')
|
||||
@$(call kokkos_append_var,KOKKOS_HEADERS_QTHREADS,'STRING "Kokkos headers QThreads list"')
|
||||
@$(call kokkos_append_var,KOKKOS_SRC,'STRING "Kokkos source list"')
|
||||
@$(call kokkos_append_string,"")
|
||||
@$(call kokkos_append_string,"#Variables used in application Makefiles")
|
||||
@$(call kokkos_append_var,KOKKOS_OS,'STRING ""') # This was not in original cmake gen
|
||||
@$(call kokkos_append_var,KOKKOS_CPP_DEPENDS,'STRING ""')
|
||||
@$(call kokkos_append_var,KOKKOS_LINK_DEPENDS,'STRING ""')
|
||||
@$(call kokkos_append_var,KOKKOS_CXXFLAGS,'STRING ""')
|
||||
@$(call kokkos_append_var,KOKKOS_CPPFLAGS,'STRING ""')
|
||||
@$(call kokkos_append_var,KOKKOS_LDFLAGS,'STRING ""')
|
||||
@$(call kokkos_append_var,KOKKOS_LIBS,'STRING ""')
|
||||
@$(call kokkos_append_var,KOKKOS_EXTRA_LIBS,'STRING ""')
|
||||
@$(call kokkos_append_string,"")
|
||||
@$(call kokkos_append_string,"#Internal settings which need to propagated for Kokkos examples")
|
||||
@$(call kokkos_append_var,KOKKOS_INTERNAL_USE_CUDA,'STRING ""')
|
||||
@$(call kokkos_append_var,KOKKOS_INTERNAL_USE_OPENMP,'STRING ""')
|
||||
@$(call kokkos_append_var,KOKKOS_INTERNAL_USE_PTHREADS,'STRING ""')
|
||||
@$(call kokkos_append_var,KOKKOS_INTERNAL_USE_ROCM,'STRING ""')
|
||||
@$(call kokkos_append_var,KOKKOS_INTERNAL_USE_QTHREADS,'STRING ""') # Not in original cmake gen
|
||||
@$(call kokkos_append_cmakefile "mark_as_advanced(KOKKOS_HEADERS KOKKOS_SRC KOKKOS_INTERNAL_USE_CUDA KOKKOS_INTERNAL_USE_OPENMP KOKKOS_INTERNAL_USE_PTHREADS)")
|
||||
@$(call kokkos_append_makefile,"")
|
||||
@$(call kokkos_append_makefile,"#Fake kokkos-clean target")
|
||||
@$(call kokkos_append_makefile,"kokkos-clean:")
|
||||
@$(call kokkos_append_makefile,"")
|
||||
@sed \
|
||||
-e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \
|
||||
-e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \
|
||||
-e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \
|
||||
-e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \
|
||||
-e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \
|
||||
-e 's|= $(KOKKOS_CONFIG_HEADER)|= $(PREFIX)/include/$(KOKKOS_CONFIG_HEADER)|g' $(KOKKOS_MAKEFILE) \
|
||||
> $(KOKKOS_MAKEFILE).tmp
|
||||
@mv -f $(KOKKOS_MAKEFILE).tmp $(KOKKOS_MAKEFILE)
|
||||
@$(call kokkos_setvar_cmakefile,KOKKOS_CXX_FLAGS,$(KOKKOS_CXXFLAGS))
|
||||
@$(call kokkos_setvar_cmakefile,KOKKOS_CPP_FLAGS,$(KOKKOS_CPPFLAGS))
|
||||
@$(call kokkos_setvar_cmakefile,KOKKOS_LD_FLAGS,$(KOKKOS_LDFLAGS))
|
||||
@$(call kokkos_setlist_cmakefile,KOKKOS_LIBS_LIST,$(KOKKOS_LIBS))
|
||||
@$(call kokkos_setlist_cmakefile,KOKKOS_EXTRA_LIBS_LIST,$(KOKKOS_EXTRA_LIBS))
|
||||
|
||||
28
lib/kokkos/core/src/Makefile.generate_header_lists
Normal file
28
lib/kokkos/core/src/Makefile.generate_header_lists
Normal file
@ -0,0 +1,28 @@
|
||||
# Build a List of Header Files
|
||||
|
||||
KOKKOS_HEADERS_INCLUDE = $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
|
||||
KOKKOS_HEADERS_INCLUDE_IMPL = $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
|
||||
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
|
||||
KOKKOS_HEADERS_INCLUDE_IMPL += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
|
||||
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
|
||||
KOKKOS_HEADERS_QTHREADS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
|
||||
KOKKOS_HEADERS_ROCM += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp)
|
||||
endif
|
||||
|
||||
@ -292,11 +292,12 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
// Static Assert WorkTag void if ReducerType not InvalidType
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ;
|
||||
|
||||
typedef typename Analysis::pointer_type pointer_type ;
|
||||
typedef typename Analysis::reference_type reference_type ;
|
||||
@ -393,7 +394,7 @@ public:
|
||||
, m_instance->get_thread_data(i)->pool_reduce_local() );
|
||||
}
|
||||
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr );
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , ptr );
|
||||
|
||||
if ( m_result_ptr ) {
|
||||
const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) );
|
||||
@ -463,11 +464,12 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
typedef typename ReducerTypeFwd::value_type ValueType;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ;
|
||||
|
||||
typedef typename Analysis::pointer_type pointer_type ;
|
||||
typedef typename Analysis::reference_type reference_type ;
|
||||
@ -558,7 +560,7 @@ public:
|
||||
, m_instance->get_thread_data(i)->pool_reduce_local() );
|
||||
}
|
||||
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr );
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , ptr );
|
||||
|
||||
if ( m_result_ptr ) {
|
||||
const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) );
|
||||
@ -920,9 +922,10 @@ private:
|
||||
, FunctorType, ReducerType> ReducerConditional;
|
||||
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTag > ValueJoin ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTagFwd > ValueJoin ;
|
||||
|
||||
typedef typename Analysis::pointer_type pointer_type ;
|
||||
typedef typename Analysis::reference_type reference_type ;
|
||||
@ -1067,7 +1070,7 @@ public:
|
||||
, m_instance->get_thread_data(i)->pool_reduce_local() );
|
||||
}
|
||||
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr );
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , ptr );
|
||||
|
||||
if ( m_result_ptr ) {
|
||||
const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) );
|
||||
|
||||
@ -248,12 +248,13 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
// Static Assert WorkTag void if ReducerType not InvalidType
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ;
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTagFwd > ValueJoin ;
|
||||
|
||||
enum {HasJoin = ReduceFunctorHasJoin<FunctorType>::value };
|
||||
enum {UseReducer = is_reducer_type<ReducerType>::value };
|
||||
@ -620,10 +621,11 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTag > ValueJoin ;
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTagFwd > ValueJoin ;
|
||||
|
||||
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||
typedef typename ValueTraits::reference_type reference_type ;
|
||||
|
||||
@ -150,11 +150,12 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType, ReducerType>::value, FunctorType, ReducerType > ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType, ReducerType>::value, WorkTag, void >::type WorkTagFwd;
|
||||
|
||||
// Static Assert WorkTag void if ReducerType not InvalidType
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
|
||||
|
||||
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||
typedef typename ValueTraits::reference_type reference_type ;
|
||||
@ -213,7 +214,7 @@ public:
|
||||
|
||||
const pointer_type data = (pointer_type) QthreadsExec::exec_all_reduce_result();
|
||||
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , data );
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , data );
|
||||
|
||||
if ( m_result_ptr ) {
|
||||
const unsigned n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) );
|
||||
@ -331,9 +332,10 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType, ReducerType>::value, WorkTag, void >::type WorkTagFwd;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
|
||||
|
||||
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||
typedef typename ValueTraits::reference_type reference_type ;
|
||||
@ -394,7 +396,7 @@ public:
|
||||
|
||||
const pointer_type data = (pointer_type) QthreadsExec::exec_all_reduce_result();
|
||||
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer), data );
|
||||
Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer), data );
|
||||
|
||||
if ( m_result_ptr ) {
|
||||
const unsigned n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) );
|
||||
|
||||
@ -102,11 +102,12 @@ void reduce_enqueue(
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, F, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType, ReducerType>::value, Tag, void >::type TagFwd;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , Tag > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , Tag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , Tag > ValueJoin ;
|
||||
typedef Kokkos::Impl::FunctorFinal< ReducerTypeFwd , Tag > ValueFinal ;
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , TagFwd > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , TagFwd > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , TagFwd > ValueJoin ;
|
||||
typedef Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagFwd > ValueFinal ;
|
||||
|
||||
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||
typedef typename ValueTraits::reference_type reference_type ;
|
||||
|
||||
@ -50,7 +50,6 @@
|
||||
#include <cstdio>
|
||||
|
||||
#include <utility>
|
||||
#include <cstdalign>
|
||||
#include <impl/Kokkos_Spinwait.hpp>
|
||||
#include <impl/Kokkos_FunctorAdapter.hpp>
|
||||
|
||||
|
||||
@ -396,9 +396,10 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
|
||||
|
||||
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||
typedef typename ValueTraits::reference_type reference_type ;
|
||||
@ -458,7 +459,7 @@ private:
|
||||
( self.m_functor , range.begin() , range.end()
|
||||
, ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ) );
|
||||
|
||||
exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) );
|
||||
exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) );
|
||||
}
|
||||
|
||||
template<class Schedule>
|
||||
@ -484,7 +485,7 @@ private:
|
||||
work_index = exec.get_work_index();
|
||||
}
|
||||
|
||||
exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) );
|
||||
exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) );
|
||||
}
|
||||
|
||||
public:
|
||||
@ -564,11 +565,12 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
typedef typename ReducerTypeFwd::value_type ValueType;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
|
||||
|
||||
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||
typedef typename ValueTraits::reference_type reference_type ;
|
||||
@ -618,7 +620,7 @@ private:
|
||||
( self.m_mdr_policy, self.m_functor , range.begin() , range.end()
|
||||
, ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ) );
|
||||
|
||||
exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) );
|
||||
exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) );
|
||||
}
|
||||
|
||||
template<class Schedule>
|
||||
@ -644,7 +646,7 @@ private:
|
||||
work_index = exec.get_work_index();
|
||||
}
|
||||
|
||||
exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) );
|
||||
exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) );
|
||||
}
|
||||
|
||||
public:
|
||||
@ -725,9 +727,10 @@ private:
|
||||
|
||||
typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional;
|
||||
typedef typename ReducerConditional::type ReducerTypeFwd;
|
||||
typedef typename Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, WorkTag, void>::type WorkTagFwd;
|
||||
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ;
|
||||
typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ;
|
||||
typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ;
|
||||
|
||||
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||
typedef typename ValueTraits::reference_type reference_type ;
|
||||
@ -767,7 +770,7 @@ private:
|
||||
( self.m_functor , Member( & exec , self.m_policy , self.m_shared )
|
||||
, ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ) );
|
||||
|
||||
exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) );
|
||||
exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) );
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
@ -49,6 +49,10 @@
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP )
|
||||
#define KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -103,7 +107,7 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
|
||||
T return_val;
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 0;
|
||||
unsigned int active = __ballot(1);
|
||||
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
unsigned int done_active = 0;
|
||||
while (active!=done_active) {
|
||||
if(!done) {
|
||||
@ -115,7 +119,7 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare ,
|
||||
done = 1;
|
||||
}
|
||||
}
|
||||
done_active = __ballot(done);
|
||||
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
|
||||
}
|
||||
return return_val;
|
||||
}
|
||||
|
||||
@ -49,6 +49,10 @@
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_EXCHANGE_HPP )
|
||||
#define KOKKOS_ATOMIC_EXCHANGE_HPP
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -126,7 +130,7 @@ T atomic_exchange( volatile T * const dest ,
|
||||
#endif
|
||||
|
||||
int done = 0;
|
||||
unsigned int active = __ballot(1);
|
||||
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
unsigned int done_active = 0;
|
||||
while (active!=done_active) {
|
||||
if(!done) {
|
||||
@ -137,7 +141,7 @@ T atomic_exchange( volatile T * const dest ,
|
||||
done = 1;
|
||||
}
|
||||
}
|
||||
done_active = __ballot(done);
|
||||
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
|
||||
}
|
||||
return return_val;
|
||||
}
|
||||
|
||||
@ -49,6 +49,10 @@
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_ADD_HPP )
|
||||
#define KOKKOS_ATOMIC_FETCH_ADD_HPP
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -139,7 +143,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
T return_val;
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 0;
|
||||
unsigned int active = __ballot(1);
|
||||
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
unsigned int done_active = 0;
|
||||
while (active!=done_active) {
|
||||
if(!done) {
|
||||
@ -151,7 +155,7 @@ T atomic_fetch_add( volatile T * const dest ,
|
||||
done = 1;
|
||||
}
|
||||
}
|
||||
done_active = __ballot(done);
|
||||
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
|
||||
}
|
||||
return return_val;
|
||||
}
|
||||
|
||||
@ -49,6 +49,10 @@
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_SUB_HPP )
|
||||
#define KOKKOS_ATOMIC_FETCH_SUB_HPP
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -117,7 +121,7 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
T return_val;
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 0;
|
||||
unsigned int active = __ballot(1);
|
||||
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
unsigned int done_active = 0;
|
||||
while (active!=done_active) {
|
||||
if(!done) {
|
||||
@ -128,7 +132,7 @@ T atomic_fetch_sub( volatile T * const dest ,
|
||||
done = 1;
|
||||
}
|
||||
}
|
||||
done_active = __ballot(done);
|
||||
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
|
||||
}
|
||||
return return_val;
|
||||
}
|
||||
|
||||
@ -46,6 +46,10 @@
|
||||
#define KOKKOS_ATOMIC_GENERIC_HPP
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
#include<Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp>
|
||||
#endif
|
||||
|
||||
// Combination operands to be used in an Compare and Exchange based atomic operation
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
@ -242,7 +246,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
T return_val;
|
||||
int done = 0;
|
||||
unsigned int active = __ballot(1);
|
||||
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
unsigned int done_active = 0;
|
||||
while (active!=done_active) {
|
||||
if(!done) {
|
||||
@ -253,7 +257,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest ,
|
||||
done=1;
|
||||
}
|
||||
}
|
||||
done_active = __ballot(done);
|
||||
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
|
||||
}
|
||||
return return_val;
|
||||
#endif
|
||||
@ -281,7 +285,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
T return_val;
|
||||
// This is a way to (hopefully) avoid dead lock in a warp
|
||||
int done = 0;
|
||||
unsigned int active = __ballot(1);
|
||||
unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1);
|
||||
unsigned int done_active = 0;
|
||||
while (active!=done_active) {
|
||||
if(!done) {
|
||||
@ -292,7 +296,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest ,
|
||||
done=1;
|
||||
}
|
||||
}
|
||||
done_active = __ballot(done);
|
||||
done_active = KOKKOS_IMPL_CUDA_BALLOT(done);
|
||||
}
|
||||
return return_val;
|
||||
#endif
|
||||
|
||||
@ -48,6 +48,10 @@
|
||||
#include <cstdint>
|
||||
#include <climits>
|
||||
|
||||
#if defined( __HCC_ACCELERATOR__ )
|
||||
#include <hc.hpp>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
|
||||
@ -132,10 +132,8 @@ setenv("MEMKIND_HBW_NODES", "1", 0);
|
||||
// struct, you may remove this line of code.
|
||||
(void) args;
|
||||
|
||||
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) {
|
||||
Kokkos::Serial::initialize();
|
||||
}
|
||||
// Always initialize Serial if it is configure time enabled
|
||||
Kokkos::Serial::initialize();
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_OPENMPTARGET )
|
||||
@ -234,12 +232,8 @@ void finalize_internal( const bool all_spaces = false )
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value ||
|
||||
std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ||
|
||||
all_spaces ) {
|
||||
if(Kokkos::Serial::is_initialized())
|
||||
Kokkos::Serial::finalize();
|
||||
}
|
||||
if(Kokkos::Serial::is_initialized())
|
||||
Kokkos::Serial::finalize();
|
||||
#endif
|
||||
|
||||
g_is_initialized = false;
|
||||
@ -383,6 +377,7 @@ void initialize(int& narg, char* arg[])
|
||||
}
|
||||
if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found)
|
||||
ndevices = atoi(num1_only);
|
||||
delete [] num1_only;
|
||||
|
||||
if( num2 != NULL ) {
|
||||
if(( !Impl::is_unsigned_int(num2+1) ) || (strlen(num2)==1) )
|
||||
@ -439,7 +434,7 @@ void initialize(int& narg, char* arg[])
|
||||
std::cout << "The following arguments exist also without prefix 'kokkos' (e.g. --help)." << std::endl;
|
||||
std::cout << "The prefixed arguments will be removed from the list by Kokkos::initialize()," << std::endl;
|
||||
std::cout << "the non-prefixed ones are not removed. Prefixed versions take precedence over " << std::endl;
|
||||
std::cout << "non prefixed ones, and the last occurence of an argument overwrites prior" << std::endl;
|
||||
std::cout << "non prefixed ones, and the last occurrence of an argument overwrites prior" << std::endl;
|
||||
std::cout << "settings." << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "--kokkos-help : print this message" << std::endl;
|
||||
|
||||
204
lib/kokkos/core/src/impl/Kokkos_HostBarrier.cpp
Normal file
204
lib/kokkos/core/src/impl/Kokkos_HostBarrier.cpp
Normal file
@ -0,0 +1,204 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
|
||||
#include <impl/Kokkos_HostBarrier.hpp>
|
||||
#include <impl/Kokkos_Spinwait.hpp>
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
namespace {
|
||||
|
||||
enum : int { HEADER_SIZE = HostBarrier::HEADER / sizeof(uint64_t) };
|
||||
|
||||
inline constexpr int length64( const int nthreads ) noexcept
|
||||
{
|
||||
return (nthreads-1 + sizeof(uint64_t)-1) / sizeof(uint64_t);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void rendezvous_initialize( volatile void * buffer
|
||||
, const int size
|
||||
, const int rank
|
||||
) noexcept
|
||||
{
|
||||
Kokkos::store_fence();
|
||||
|
||||
// ensure that the buffer has been zero'd out
|
||||
constexpr uint8_t zero8 = static_cast<uint8_t>(0);
|
||||
constexpr uint64_t zero64 = static_cast<uint64_t>(0);
|
||||
|
||||
volatile uint64_t * header = reinterpret_cast<volatile uint64_t *>(buffer);
|
||||
|
||||
if (rank > 0) {
|
||||
volatile uint8_t * bytes = reinterpret_cast<volatile uint8_t *>(buffer) + RENDEZVOUS_HEADER;
|
||||
|
||||
bytes[rank-1] = zero8;
|
||||
|
||||
// last thread is responsible for zeroing out the final bytes of the last uint64_t
|
||||
if (rank == size-1) {
|
||||
const int tmp = (size-1) % sizeof(uint64_t);
|
||||
const int rem = tmp ? sizeof(uint64_t) - tmp : 0;
|
||||
for (int i=0; i<rem; ++i) {
|
||||
bytes[rank+i] = zero8;
|
||||
}
|
||||
}
|
||||
|
||||
spinwait_until_equal( *header, zero64 );
|
||||
}
|
||||
else {
|
||||
|
||||
const int n = length64(size);
|
||||
volatile uint64_t * buff = reinterpret_cast<volatile uint64_t *>(buffer) + HEADER_SIZE;
|
||||
|
||||
// wait for other threads to finish initializing
|
||||
for (int i=0; i<n; ++i) {
|
||||
spinwait_until_equal( buff[i], zero64 );
|
||||
}
|
||||
|
||||
// release the waiting threads
|
||||
*header = zero64;
|
||||
Kokkos::store_fence();
|
||||
}
|
||||
Kokkos::load_fence();
|
||||
}
|
||||
|
||||
bool rendezvous( volatile void * buffer
|
||||
, uint64_t & step
|
||||
, const int size
|
||||
, const int rank
|
||||
, bool active_wait
|
||||
) noexcept
|
||||
{
|
||||
// Force all outstanding stores from this thread to retire before continuing
|
||||
Kokkos::store_fence();
|
||||
|
||||
// guarantees that will never spinwait on a spin_value of 0
|
||||
step = static_cast<uint8_t>(step + 1u)
|
||||
? step + 1u
|
||||
: step + 2u
|
||||
;
|
||||
|
||||
// if size == 1, it is incorrect for rank 0 to check the tail value of the buffer
|
||||
// this optimization prevents a potential read of uninitialized memory
|
||||
if ( size == 1 ) { return true; }
|
||||
|
||||
const uint8_t byte_value = static_cast<uint8_t>(step);
|
||||
|
||||
// byte that is set in the spin_value rotates every time
|
||||
// this prevents threads from overtaking the master thread
|
||||
const uint64_t spin_value = static_cast<uint64_t>(byte_value) << (byte_value&7);
|
||||
|
||||
if ( rank > 0 ) {
|
||||
volatile uint64_t * header = reinterpret_cast<volatile uint64_t *>(buffer);
|
||||
volatile uint8_t * bytes = reinterpret_cast<volatile uint8_t *>(buffer) + RENDEZVOUS_HEADER;
|
||||
|
||||
bytes[ rank-1 ] = byte_value;
|
||||
|
||||
if ( active_wait ) {
|
||||
spinwait_until_equal( *header, spin_value );
|
||||
}
|
||||
else {
|
||||
yield_until_equal( *header, spin_value );
|
||||
}
|
||||
}
|
||||
else { // rank 0
|
||||
volatile uint64_t * buff = reinterpret_cast<volatile uint64_t *>(buffer) + HEADER_SIZE;
|
||||
const int n = length64(size);
|
||||
|
||||
uint64_t comp = byte_value;
|
||||
comp = comp | (comp << 8);
|
||||
comp = comp | (comp << 16);
|
||||
comp = comp | (comp << 32);
|
||||
|
||||
const int rem = (size-1) % sizeof(uint64_t);
|
||||
|
||||
union {
|
||||
volatile uint64_t value;
|
||||
volatile uint8_t array[sizeof(uint64_t)];
|
||||
} tmp{};
|
||||
|
||||
for (int i=0; i<rem; ++i) {
|
||||
tmp.array[i] = byte_value;
|
||||
}
|
||||
|
||||
const uint64_t tail = rem ? tmp.value : comp;
|
||||
|
||||
for (int i=0; i<n-1; ++i) {
|
||||
spinwait_until_equal( buff[i], comp );
|
||||
}
|
||||
spinwait_until_equal( buff[n-1], tail );
|
||||
|
||||
}
|
||||
|
||||
// Force all outstanding stores from other threads to retire before allowing
|
||||
// this thread to continue. This forces correctness on systems with out-of-order
|
||||
// memory (Power and ARM)
|
||||
Kokkos::load_fence();
|
||||
|
||||
return rank == 0;
|
||||
}
|
||||
|
||||
void rendezvous_release( volatile void * buffer
|
||||
, const uint64_t step
|
||||
) noexcept
|
||||
{
|
||||
const uint8_t byte_value = static_cast<uint8_t>(step);
|
||||
const uint64_t spin_value = static_cast<uint64_t>(byte_value) << (byte_value&7);
|
||||
volatile uint64_t * header = reinterpret_cast<volatile uint64_t *>(buffer);
|
||||
|
||||
// Force all outstanding stores from this thread to retire before releasing
|
||||
// the other threads. This forces correctness on systems with out-of-order
|
||||
// memory (Power and ARM)
|
||||
Kokkos::store_fence();
|
||||
|
||||
*header = spin_value;
|
||||
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
146
lib/kokkos/core/src/impl/Kokkos_HostBarrier.hpp
Normal file
146
lib/kokkos/core/src/impl/Kokkos_HostBarrier.hpp
Normal file
@ -0,0 +1,146 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_HOST_BARRIER_HPP
|
||||
#define KOKKOS_HOST_BARRIER_HPP
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
enum : int { RENDEZVOUS_ALIGNMENT = 128
|
||||
, RENDEZVOUS_HEADER = RENDEZVOUS_ALIGNMENT
|
||||
};
|
||||
|
||||
inline constexpr int rendezvous_buffer_size( const int nthreads ) noexcept
|
||||
{
|
||||
return RENDEZVOUS_HEADER + ((nthreads-1 + RENDEZVOUS_ALIGNMENT-1) / RENDEZVOUS_ALIGNMENT) * RENDEZVOUS_ALIGNMENT;
|
||||
}
|
||||
|
||||
void rendezvous_initialize( volatile void * buffer
|
||||
, const int size
|
||||
, const int rank
|
||||
) noexcept;
|
||||
|
||||
|
||||
bool rendezvous( volatile void * buffer
|
||||
, uint64_t & step
|
||||
, const int size
|
||||
, const int rank
|
||||
, bool active_wait = true
|
||||
) noexcept;
|
||||
|
||||
void rendezvous_release( volatile void * buffer
|
||||
, const uint64_t step
|
||||
) noexcept;
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
|
||||
class HostBarrier
|
||||
{
|
||||
public:
|
||||
|
||||
enum : int { ALIGNMENT = RENDEZVOUS_ALIGNMENT };
|
||||
enum : int { HEADER = ALIGNMENT};
|
||||
|
||||
enum Policy : int { ACTIVE, PASSIVE };
|
||||
|
||||
inline static constexpr int buffer_size( const int nthreads ) noexcept
|
||||
{
|
||||
return rendezvous_buffer_size(nthreads);
|
||||
}
|
||||
|
||||
HostBarrier( volatile void * arg_buffer
|
||||
, int arg_size
|
||||
, int arg_rank
|
||||
, Policy arg_policy
|
||||
) noexcept
|
||||
: m_buffer{arg_buffer}
|
||||
, m_size{arg_size}
|
||||
, m_rank{arg_rank}
|
||||
, m_policy{arg_policy}
|
||||
, m_step{0}
|
||||
{
|
||||
rendezvous_initialize( m_buffer, m_size, m_rank );
|
||||
}
|
||||
|
||||
bool rendezvous() const noexcept
|
||||
{
|
||||
return Kokkos::Impl::rendezvous( m_buffer
|
||||
, m_step
|
||||
, m_size
|
||||
, m_rank
|
||||
, m_policy == ACTIVE
|
||||
);
|
||||
}
|
||||
|
||||
void rendezvous_release() const noexcept
|
||||
{
|
||||
Kokkos::Impl::rendezvous_release( m_buffer, m_step );
|
||||
}
|
||||
|
||||
private:
|
||||
volatile void * m_buffer ;
|
||||
const int m_size ;
|
||||
const int m_rank ;
|
||||
const Policy m_policy ;
|
||||
mutable uint64_t m_step ;
|
||||
|
||||
private:
|
||||
HostBarrier( const HostBarrier & ) = delete;
|
||||
HostBarrier( HostBarrier && ) = delete;
|
||||
HostBarrier & operator=( const HostBarrier & ) = delete;
|
||||
HostBarrier & operator=( HostBarrier && ) = delete;
|
||||
};
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif // KOKKOS_HOST_BARRIER_HPP
|
||||
|
||||
@ -206,158 +206,6 @@ void HostThreadTeamData::disband_team()
|
||||
m_team_rendezvous_step = 0 ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/* pattern for rendezvous
|
||||
*
|
||||
* if ( rendezvous() ) {
|
||||
* ... all other threads are still in team_rendezvous() ...
|
||||
* rendezvous_release();
|
||||
* ... all other threads are released from team_rendezvous() ...
|
||||
* }
|
||||
*/
|
||||
|
||||
int HostThreadTeamData::rendezvous( int64_t * const buffer
|
||||
, int & rendezvous_step
|
||||
, int const size
|
||||
, int const rank ) noexcept
|
||||
{
|
||||
enum : int { shift_byte = 3 };
|
||||
enum : int { size_byte = ( 01 << shift_byte ) }; // == 8
|
||||
enum : int { mask_byte = size_byte - 1 };
|
||||
|
||||
enum : int { shift_mem_cycle = 2 };
|
||||
enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4
|
||||
enum : int { mask_mem_cycle = size_mem_cycle - 1 };
|
||||
|
||||
// Cycle step values: 1 <= step <= size_val_cycle
|
||||
// An odd multiple of memory cycle so that when a memory location
|
||||
// is reused it has a different value.
|
||||
// Must be representable within a single byte: size_val_cycle < 16
|
||||
|
||||
enum : int { size_val_cycle = 3 * size_mem_cycle };
|
||||
|
||||
// Requires:
|
||||
// Called by rank = [ 0 .. size )
|
||||
// buffer aligned to int64_t[4]
|
||||
|
||||
// A sequence of rendezvous uses four cycled locations in memory
|
||||
// and non-equal cycled synchronization values to
|
||||
// 1) prevent rendezvous from overtaking one another and
|
||||
// 2) give each spin wait location an int64_t[4] span
|
||||
// so that it has its own cache line.
|
||||
|
||||
const int step = ( rendezvous_step % size_val_cycle ) + 1 ;
|
||||
|
||||
rendezvous_step = step ;
|
||||
|
||||
// The leading int64_t[4] span is for thread 0 to write
|
||||
// and all other threads to read spin-wait.
|
||||
// sync_offset is the index into this array for this step.
|
||||
|
||||
const int sync_offset = ( step & mask_mem_cycle ) + size_mem_cycle ;
|
||||
|
||||
if ( rank ) {
|
||||
|
||||
const int group_begin = rank << shift_byte ; // == rank * size_byte
|
||||
|
||||
if ( group_begin < size ) {
|
||||
|
||||
// This thread waits for threads
|
||||
// [ group_begin .. group_begin + 8 )
|
||||
// [ rank*8 .. rank*8 + 8 )
|
||||
// to write to their designated bytes.
|
||||
|
||||
const int end = group_begin + size_byte < size
|
||||
? size_byte : size - group_begin ;
|
||||
|
||||
int64_t value = 0 ;
|
||||
|
||||
for ( int i = 0 ; i < end ; ++i ) {
|
||||
((int8_t*) & value )[i] = int8_t( step );
|
||||
}
|
||||
// Do not REMOVE this store fence!!!
|
||||
// Makes stuff hang on GCC with more than 8 threads
|
||||
store_fence();
|
||||
spinwait_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ]
|
||||
, value );
|
||||
}
|
||||
|
||||
{
|
||||
// This thread sets its designated byte.
|
||||
// ( rank % size_byte ) +
|
||||
// ( ( rank / size_byte ) * size_byte * size_mem_cycle ) +
|
||||
// ( sync_offset * size_byte )
|
||||
int offset = ( rank & mask_byte )
|
||||
+ ( ( rank & ~mask_byte ) << shift_mem_cycle )
|
||||
+ ( sync_offset << shift_byte );
|
||||
|
||||
// Switch designated byte if running on big endian machine
|
||||
volatile uint16_t value = 1;
|
||||
volatile uint8_t* byte = (uint8_t*) &value;
|
||||
volatile bool is_big_endian = (!(byte[0] == 1));
|
||||
if (is_big_endian) {
|
||||
int remainder = ((offset) % 8);
|
||||
int base = offset - remainder;
|
||||
int shift = 7 - remainder;
|
||||
offset = base + shift;
|
||||
}
|
||||
|
||||
// All of this thread's previous memory stores must be complete before
|
||||
// this thread stores the step value at this thread's designated byte
|
||||
// in the shared synchronization array.
|
||||
|
||||
Kokkos::memory_fence();
|
||||
|
||||
((volatile int8_t*) buffer)[ offset ] = int8_t( step );
|
||||
|
||||
// Memory fence to push the previous store out
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
|
||||
// Wait for thread 0 to release all other threads
|
||||
|
||||
spinwait_until_equal( buffer[ step & mask_mem_cycle ] , int64_t(step) );
|
||||
|
||||
}
|
||||
else {
|
||||
// Thread 0 waits for threads [1..7]
|
||||
// to write to their designated bytes.
|
||||
|
||||
const int end = size_byte < size ? 8 : size ;
|
||||
|
||||
int64_t value = 0 ;
|
||||
for ( int i = 1 ; i < end ; ++i ) {
|
||||
((int8_t *) & value)[i] = int8_t( step );
|
||||
}
|
||||
|
||||
spinwait_until_equal( buffer[ sync_offset ], value );
|
||||
}
|
||||
|
||||
return rank ? 0 : 1 ;
|
||||
}
|
||||
|
||||
void HostThreadTeamData::
|
||||
rendezvous_release( int64_t * const buffer
|
||||
, int const rendezvous_step ) noexcept
|
||||
{
|
||||
enum : int { shift_mem_cycle = 2 };
|
||||
enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4
|
||||
enum : int { mask_mem_cycle = size_mem_cycle - 1 };
|
||||
|
||||
// Requires:
|
||||
// Called after team_rendezvous
|
||||
// Called only by true == team_rendezvous(root)
|
||||
|
||||
// Memory fence to be sure all previous writes are complete:
|
||||
Kokkos::memory_fence();
|
||||
|
||||
((volatile int64_t*) buffer)[ rendezvous_step & mask_mem_cycle ] =
|
||||
int64_t( rendezvous_step );
|
||||
|
||||
// Memory fence to push the store out
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
int HostThreadTeamData::get_work_stealing() noexcept
|
||||
|
||||
@ -50,7 +50,7 @@
|
||||
#include <Kokkos_ExecPolicy.hpp>
|
||||
#include <impl/Kokkos_FunctorAdapter.hpp>
|
||||
#include <impl/Kokkos_FunctorAnalysis.hpp>
|
||||
#include <impl/Kokkos_Rendezvous.hpp>
|
||||
#include <impl/Kokkos_HostBarrier.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
@ -113,50 +113,29 @@ private:
|
||||
int m_league_size ;
|
||||
int m_work_chunk ;
|
||||
int m_steal_rank ; // work stealing rank
|
||||
int mutable m_team_rendezvous_step ;
|
||||
uint64_t mutable m_pool_rendezvous_step ;
|
||||
uint64_t mutable m_team_rendezvous_step ;
|
||||
|
||||
HostThreadTeamData * team_member( int r ) const noexcept
|
||||
{ return ((HostThreadTeamData**)(m_pool_scratch+m_pool_members))[m_team_base+r]; }
|
||||
|
||||
// Rendezvous pattern:
|
||||
// if ( rendezvous(root) ) {
|
||||
// ... only root thread here while all others wait ...
|
||||
// rendezvous_release();
|
||||
// }
|
||||
// else {
|
||||
// ... all other threads release here ...
|
||||
// }
|
||||
//
|
||||
// Requires: buffer[ ( max_threads / 8 ) * 4 + 4 ]; 0 == max_threads % 8
|
||||
//
|
||||
static
|
||||
int rendezvous( int64_t * const buffer
|
||||
, int & rendezvous_step
|
||||
, int const size
|
||||
, int const rank ) noexcept ;
|
||||
|
||||
static
|
||||
void rendezvous_release( int64_t * const buffer
|
||||
, int const rendezvous_step ) noexcept ;
|
||||
|
||||
public:
|
||||
|
||||
inline
|
||||
int team_rendezvous( int const root ) const noexcept
|
||||
{
|
||||
return 1 == m_team_size ? 1 :
|
||||
HostThreadTeamData::
|
||||
rendezvous( m_team_scratch + m_team_rendezvous
|
||||
, m_team_rendezvous_step
|
||||
, m_team_size
|
||||
, ( m_team_rank + m_team_size - root ) % m_team_size );
|
||||
, ( m_team_rank + m_team_size - root ) % m_team_size
|
||||
);
|
||||
}
|
||||
|
||||
inline
|
||||
int team_rendezvous() const noexcept
|
||||
{
|
||||
return 1 == m_team_size ? 1 :
|
||||
HostThreadTeamData::
|
||||
rendezvous( m_team_scratch + m_team_rendezvous
|
||||
, m_team_rendezvous_step
|
||||
, m_team_size
|
||||
@ -167,7 +146,6 @@ public:
|
||||
void team_rendezvous_release() const noexcept
|
||||
{
|
||||
if ( 1 < m_team_size ) {
|
||||
HostThreadTeamData::
|
||||
rendezvous_release( m_team_scratch + m_team_rendezvous
|
||||
, m_team_rendezvous_step );
|
||||
}
|
||||
@ -176,30 +154,30 @@ public:
|
||||
inline
|
||||
int pool_rendezvous() const noexcept
|
||||
{
|
||||
static constexpr int yield_wait =
|
||||
static constexpr bool active_wait =
|
||||
#if defined( KOKKOS_COMPILER_IBM )
|
||||
// If running on IBM POWER architecture the global
|
||||
// level rendzvous should immediately yield when
|
||||
// waiting for other threads in the pool to arrive.
|
||||
1
|
||||
false
|
||||
#else
|
||||
0
|
||||
true
|
||||
#endif
|
||||
;
|
||||
return 1 == m_pool_size ? 1 :
|
||||
Kokkos::Impl::
|
||||
rendezvous( m_pool_scratch + m_pool_rendezvous
|
||||
, m_pool_rendezvous_step
|
||||
, m_pool_size
|
||||
, m_pool_rank
|
||||
, yield_wait );
|
||||
, active_wait
|
||||
);
|
||||
}
|
||||
|
||||
inline
|
||||
void pool_rendezvous_release() const noexcept
|
||||
{
|
||||
if ( 1 < m_pool_size ) {
|
||||
Kokkos::Impl::
|
||||
rendezvous_release( m_pool_scratch + m_pool_rendezvous );
|
||||
rendezvous_release( m_pool_scratch + m_pool_rendezvous, m_pool_rendezvous_step );
|
||||
}
|
||||
}
|
||||
|
||||
@ -225,6 +203,7 @@ public:
|
||||
, m_league_size(1)
|
||||
, m_work_chunk(0)
|
||||
, m_steal_rank(0)
|
||||
, m_pool_rendezvous_step(0)
|
||||
, m_team_rendezvous_step(0)
|
||||
{}
|
||||
|
||||
|
||||
125
lib/kokkos/core/src/impl/Kokkos_MemoryPool.cpp
Normal file
125
lib/kokkos/core/src/impl/Kokkos_MemoryPool.cpp
Normal file
@ -0,0 +1,125 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/* Verify size constraints:
|
||||
* min_block_alloc_size <= max_block_alloc_size
|
||||
* max_block_alloc_size <= min_superblock_size
|
||||
* min_superblock_size <= max_superblock_size
|
||||
* min_superblock_size <= min_total_alloc_size
|
||||
* min_superblock_size <= min_block_alloc_size *
|
||||
* max_block_per_superblock
|
||||
*/
|
||||
void memory_pool_bounds_verification
|
||||
( size_t min_block_alloc_size
|
||||
, size_t max_block_alloc_size
|
||||
, size_t min_superblock_size
|
||||
, size_t max_superblock_size
|
||||
, size_t max_block_per_superblock
|
||||
, size_t min_total_alloc_size
|
||||
)
|
||||
{
|
||||
const size_t max_superblock =
|
||||
min_block_alloc_size * max_block_per_superblock ;
|
||||
|
||||
if ( ( size_t(max_superblock_size) < min_superblock_size ) ||
|
||||
( min_total_alloc_size < min_superblock_size ) ||
|
||||
( max_superblock < min_superblock_size ) ||
|
||||
( min_superblock_size < max_block_alloc_size ) ||
|
||||
( max_block_alloc_size < min_block_alloc_size ) ) {
|
||||
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << "Kokkos::MemoryPool size constraint violation" ;
|
||||
|
||||
if ( size_t(max_superblock_size) < min_superblock_size ) {
|
||||
msg << " : max_superblock_size("
|
||||
<< max_superblock_size
|
||||
<< ") < min_superblock_size("
|
||||
<< min_superblock_size << ")" ;
|
||||
}
|
||||
|
||||
if ( min_total_alloc_size < min_superblock_size ) {
|
||||
msg << " : min_total_alloc_size("
|
||||
<< min_total_alloc_size
|
||||
<< ") < min_superblock_size("
|
||||
<< min_superblock_size << ")" ;
|
||||
}
|
||||
|
||||
if ( max_superblock < min_superblock_size ) {
|
||||
msg << " : max_superblock("
|
||||
<< max_superblock
|
||||
<< ") < min_superblock_size("
|
||||
<< min_superblock_size << ")" ;
|
||||
}
|
||||
|
||||
if ( min_superblock_size < max_block_alloc_size ) {
|
||||
msg << " : min_superblock_size("
|
||||
<< min_superblock_size
|
||||
<< ") < max_block_alloc_size("
|
||||
<< max_block_alloc_size << ")" ;
|
||||
}
|
||||
|
||||
if ( max_block_alloc_size < min_block_alloc_size ) {
|
||||
msg << " : max_block_alloc_size("
|
||||
<< max_block_alloc_size
|
||||
<< ") < min_block_alloc_size("
|
||||
<< min_block_alloc_size << ")" ;
|
||||
}
|
||||
|
||||
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -45,7 +45,9 @@
|
||||
#if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE_HPP )
|
||||
#define KOKKOS_MEMORY_FENCE_HPP
|
||||
|
||||
#if !defined(_OPENMP)
|
||||
#include <atomic>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
@ -54,8 +56,10 @@ namespace Kokkos {
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void memory_fence()
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#elif defined( _OPENMP )
|
||||
#pragma omp flush
|
||||
#else
|
||||
std::atomic_thread_fence( std::memory_order_seq_cst );
|
||||
#endif
|
||||
@ -71,6 +75,8 @@ void store_fence()
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#elif defined( _OPENMP )
|
||||
#pragma omp flush
|
||||
#else
|
||||
std::atomic_thread_fence( std::memory_order_seq_cst );
|
||||
#endif
|
||||
@ -86,6 +92,8 @@ void load_fence()
|
||||
{
|
||||
#if defined( __CUDA_ARCH__ )
|
||||
__threadfence();
|
||||
#elif defined( _OPENMP )
|
||||
#pragma omp flush
|
||||
#else
|
||||
std::atomic_thread_fence( std::memory_order_seq_cst );
|
||||
#endif
|
||||
|
||||
@ -69,6 +69,13 @@ static deallocateDataFunction deallocateDataCallee = nullptr;
|
||||
static beginDeepCopyFunction beginDeepCopyCallee = nullptr;
|
||||
static endDeepCopyFunction endDeepCopyCallee = nullptr;
|
||||
|
||||
static createProfileSectionFunction createSectionCallee = nullptr;
|
||||
static startProfileSectionFunction startSectionCallee = nullptr;
|
||||
static stopProfileSectionFunction stopSectionCallee = nullptr;
|
||||
static destroyProfileSectionFunction destroySectionCallee = nullptr;
|
||||
|
||||
static profileEventFunction profileEventCallee = nullptr;
|
||||
|
||||
SpaceHandle::SpaceHandle(const char* space_name) {
|
||||
strncpy(name,space_name,64);
|
||||
}
|
||||
@ -162,6 +169,37 @@ void endDeepCopy() {
|
||||
}
|
||||
}
|
||||
|
||||
void createProfileSection(const std::string& sectionName, uint32_t* secID) {
|
||||
|
||||
if(nullptr != createSectionCallee) {
|
||||
(*createSectionCallee)(sectionName.c_str(), secID);
|
||||
}
|
||||
}
|
||||
|
||||
void startSection(const uint32_t secID) {
|
||||
if(nullptr != startSectionCallee) {
|
||||
(*startSectionCallee)(secID);
|
||||
}
|
||||
}
|
||||
|
||||
void stopSection(const uint32_t secID) {
|
||||
if(nullptr != stopSectionCallee) {
|
||||
(*stopSectionCallee)(secID);
|
||||
}
|
||||
}
|
||||
|
||||
void destroyProfileSection(const uint32_t secID) {
|
||||
if(nullptr != destroySectionCallee) {
|
||||
(*destroySectionCallee)(secID);
|
||||
}
|
||||
}
|
||||
|
||||
void markEvent(const std::string& eventName) {
|
||||
if(nullptr != profileEventCallee) {
|
||||
(*profileEventCallee)(eventName.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
|
||||
// Make sure initialize calls happens only once
|
||||
@ -230,7 +268,18 @@ void initialize() {
|
||||
beginDeepCopyCallee = *((beginDeepCopyFunction*) &p13);
|
||||
auto p14 = dlsym(firstProfileLibrary, "kokkosp_end_deep_copy");
|
||||
endDeepCopyCallee = *((endDeepCopyFunction*) &p14);
|
||||
|
||||
|
||||
auto p15 = dlsym(firstProfileLibrary, "kokkosp_create_profile_section");
|
||||
createSectionCallee = *((createProfileSectionFunction*) &p15);
|
||||
auto p16 = dlsym(firstProfileLibrary, "kokkosp_start_profile_section");
|
||||
startSectionCallee = *((startProfileSectionFunction*) &p16);
|
||||
auto p17 = dlsym(firstProfileLibrary, "kokkosp_stop_profile_section");
|
||||
stopSectionCallee = *((stopProfileSectionFunction*) &p17);
|
||||
auto p18 = dlsym(firstProfileLibrary, "kokkosp_destroy_profile_section");
|
||||
destroySectionCallee = *((destroyProfileSectionFunction*) &p18);
|
||||
|
||||
auto p19 = dlsym(firstProfileLibrary, "kokkosp_profile_event");
|
||||
profileEventCallee = *((profileEventFunction*) &p19);
|
||||
}
|
||||
}
|
||||
|
||||
@ -274,6 +323,13 @@ void finalize() {
|
||||
|
||||
beginDeepCopyCallee = nullptr;
|
||||
endDeepCopyCallee = nullptr;
|
||||
|
||||
createSectionCallee = nullptr;
|
||||
startSectionCallee = nullptr;
|
||||
stopSectionCallee = nullptr;
|
||||
destroySectionCallee = nullptr;
|
||||
|
||||
profileEventCallee = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -45,6 +45,7 @@
|
||||
#define KOKKOSP_INTERFACE_HPP
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
|
||||
#include <cstddef>
|
||||
@ -57,7 +58,7 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
|
||||
#define KOKKOSP_INTERFACE_VERSION 20150628
|
||||
#define KOKKOSP_INTERFACE_VERSION 20171029
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Profiling {
|
||||
@ -81,6 +82,13 @@ typedef void (*popFunction)();
|
||||
typedef void (*allocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t);
|
||||
typedef void (*deallocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t);
|
||||
|
||||
typedef void (*createProfileSectionFunction)(const char*, uint32_t*);
|
||||
typedef void (*startProfileSectionFunction)(const uint32_t);
|
||||
typedef void (*stopProfileSectionFunction)(const uint32_t);
|
||||
typedef void (*destroyProfileSectionFunction)(const uint32_t);
|
||||
|
||||
typedef void (*profileEventFunction)(const char*);
|
||||
|
||||
typedef void (*beginDeepCopyFunction)(
|
||||
SpaceHandle, const char*, const void*,
|
||||
SpaceHandle, const char*, const void*,
|
||||
@ -99,6 +107,13 @@ void endParallelReduce(const uint64_t kernelID);
|
||||
void pushRegion(const std::string& kName);
|
||||
void popRegion();
|
||||
|
||||
void createProfileSection(const std::string& sectionName, uint32_t* secID);
|
||||
void startSection(const uint32_t secID);
|
||||
void stopSection(const uint32_t secID);
|
||||
void destroyProfileSection(const uint32_t secID);
|
||||
|
||||
void markEvent(const std::string* evName);
|
||||
|
||||
void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size);
|
||||
void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size);
|
||||
|
||||
|
||||
@ -1,219 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
#include <impl/Kokkos_Rendezvous.hpp>
|
||||
#include <impl/Kokkos_Spinwait.hpp>
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/* pattern for rendezvous
|
||||
*
|
||||
* if ( rendezvous() ) {
|
||||
* ... all other threads are still in team_rendezvous() ...
|
||||
* rendezvous_release();
|
||||
* ... all other threads are released from team_rendezvous() ...
|
||||
* }
|
||||
*/
|
||||
|
||||
int rendezvous( volatile int64_t * const buffer
|
||||
, int const size
|
||||
, int const rank
|
||||
, int const slow
|
||||
) noexcept
|
||||
{
|
||||
enum : int { shift_byte = 3 };
|
||||
enum : int { size_byte = ( 01 << shift_byte ) }; // == 8
|
||||
enum : int { mask_byte = size_byte - 1 };
|
||||
|
||||
enum : int { shift_mem_cycle = 2 };
|
||||
enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4
|
||||
enum : int { mask_mem_cycle = size_mem_cycle - 1 };
|
||||
|
||||
// Cycle step values: 1 <= step <= size_val_cycle
|
||||
// An odd multiple of memory cycle so that when a memory location
|
||||
// is reused it has a different value.
|
||||
// Must be representable within a single byte: size_val_cycle < 16
|
||||
|
||||
enum : int { size_val_cycle = 3 * size_mem_cycle };
|
||||
|
||||
// Requires:
|
||||
// Called by rank = [ 0 .. size )
|
||||
// buffer aligned to int64_t[4]
|
||||
|
||||
// A sequence of rendezvous uses four cycled locations in memory
|
||||
// and non-equal cycled synchronization values to
|
||||
// 1) prevent rendezvous from overtaking one another and
|
||||
// 2) give each spin wait location an int64_t[4] span
|
||||
// so that it has its own cache line.
|
||||
|
||||
const int64_t step = (buffer[0] % size_val_cycle ) + 1 ;
|
||||
|
||||
// The leading int64_t[4] span is for thread 0 to write
|
||||
// and all other threads to read spin-wait.
|
||||
// sync_offset is the index into this array for this step.
|
||||
|
||||
const int sync_offset = ( step & mask_mem_cycle ) + size_mem_cycle + size_mem_cycle ;
|
||||
|
||||
if ( rank ) {
|
||||
|
||||
const int group_begin = rank << shift_byte ; // == rank * size_byte
|
||||
|
||||
if ( group_begin < size ) {
|
||||
|
||||
// This thread waits for threads
|
||||
// [ group_begin .. group_begin + 8 )
|
||||
// [ rank*8 .. rank*8 + 8 )
|
||||
// to write to their designated bytes.
|
||||
|
||||
const int end = group_begin + size_byte < size
|
||||
? size_byte : size - group_begin ;
|
||||
|
||||
int64_t value = 0;
|
||||
for ( int i = 0 ; i < end ; ++i ) {
|
||||
value |= step << (i * size_byte );
|
||||
}
|
||||
|
||||
store_fence(); // This should not be needed but fixes #742
|
||||
|
||||
if ( slow ) {
|
||||
yield_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ]
|
||||
, value );
|
||||
}
|
||||
else {
|
||||
spinwait_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ]
|
||||
, value );
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// This thread sets its designated byte.
|
||||
// ( rank % size_byte ) +
|
||||
// ( ( rank / size_byte ) * size_byte * size_mem_cycle ) +
|
||||
// ( sync_offset * size_byte )
|
||||
int offset = ( rank & mask_byte )
|
||||
+ ( ( rank & ~mask_byte ) << shift_mem_cycle )
|
||||
+ ( sync_offset << shift_byte );
|
||||
|
||||
// Switch designated byte if running on big endian machine
|
||||
volatile uint16_t value = 1;
|
||||
volatile uint8_t* byte = (uint8_t*) &value;
|
||||
volatile bool is_big_endian = (!(byte[0] == 1));
|
||||
if (is_big_endian) {
|
||||
int remainder = ((offset) % 8);
|
||||
int base = offset - remainder;
|
||||
int shift = 7 - remainder;
|
||||
offset = base + shift;
|
||||
}
|
||||
|
||||
// All of this thread's previous memory stores must be complete before
|
||||
// this thread stores the step value at this thread's designated byte
|
||||
// in the shared synchronization array.
|
||||
|
||||
Kokkos::memory_fence();
|
||||
|
||||
((volatile int8_t*) buffer)[ offset ] = int8_t( step );
|
||||
|
||||
// Memory fence to push the previous store out
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
|
||||
// Wait for thread 0 to release all other threads
|
||||
|
||||
if ( slow ) {
|
||||
yield_until_equal( buffer[ (step & mask_mem_cycle) + size_mem_cycle ] , int64_t(step) );
|
||||
}
|
||||
else {
|
||||
spinwait_until_equal( buffer[ (step & mask_mem_cycle) + size_mem_cycle ] , int64_t(step) );
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Thread 0 waits for threads [1..7]
|
||||
// to write to their designated bytes.
|
||||
|
||||
const int end = size_byte < size ? 8 : size ;
|
||||
|
||||
int64_t value = 0;
|
||||
for ( int i = 1 ; i < end ; ++i ) {
|
||||
value |= step << (i * size_byte );
|
||||
}
|
||||
|
||||
if ( slow ) {
|
||||
yield_until_equal( buffer[ sync_offset ], value );
|
||||
}
|
||||
else {
|
||||
spinwait_until_equal( buffer[ sync_offset ], value );
|
||||
}
|
||||
}
|
||||
|
||||
return rank ? 0 : 1 ;
|
||||
}
|
||||
|
||||
void rendezvous_release( volatile int64_t * const buffer ) noexcept
|
||||
{
|
||||
enum : int { shift_mem_cycle = 2 };
|
||||
enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4
|
||||
enum : int { mask_mem_cycle = size_mem_cycle - 1 };
|
||||
enum : int { size_val_cycle = 3 * size_mem_cycle };
|
||||
|
||||
// Requires:
|
||||
// Called after team_rendezvous
|
||||
// Called only by true == team_rendezvous(root)
|
||||
|
||||
// update step
|
||||
const int64_t step = (buffer[0] % size_val_cycle ) + 1;
|
||||
buffer[0] = step;
|
||||
|
||||
// Memory fence to be sure all previous writes are complete:
|
||||
Kokkos::memory_fence();
|
||||
|
||||
buffer[ (step & mask_mem_cycle) + size_mem_cycle ] = step;
|
||||
|
||||
// Memory fence to push the store out
|
||||
Kokkos::memory_fence();
|
||||
}
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
@ -1,87 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_IMPL_RENDEZVOUS_HPP
|
||||
#define KOKKOS_IMPL_RENDEZVOUS_HPP
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
inline
|
||||
constexpr int rendezvous_buffer_size( int max_members ) noexcept
|
||||
{
|
||||
return (((max_members + 7) / 8) * 4) + 4 + 4;
|
||||
}
|
||||
|
||||
/** \brief Thread pool rendezvous
|
||||
*
|
||||
* Rendezvous pattern:
|
||||
* if ( rendezvous(root) ) {
|
||||
* ... only root thread here while all others wait ...
|
||||
* rendezvous_release();
|
||||
* }
|
||||
* else {
|
||||
* ... all other threads release here ...
|
||||
* }
|
||||
*
|
||||
* Requires: buffer[ rendezvous_buffer_size( max_threads ) ];
|
||||
*
|
||||
* When slow != 0 the expectation is thread arrival will be
|
||||
* slow so the threads that arrive early should quickly yield
|
||||
* their core to the runtime thus possibly allowing the late
|
||||
* arriving threads to have more resources
|
||||
* (e.g., power and clock frequency).
|
||||
*/
|
||||
int rendezvous( volatile int64_t * const buffer
|
||||
, int const size
|
||||
, int const rank
|
||||
, int const slow = 0 ) noexcept ;
|
||||
|
||||
void rendezvous_release( volatile int64_t * const buffer ) noexcept ;
|
||||
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif // KOKKOS_IMPL_RENDEZVOUS_HPP
|
||||
|
||||
@ -60,6 +60,8 @@ namespace {
|
||||
|
||||
HostThreadTeamData g_serial_thread_team_data ;
|
||||
|
||||
bool g_serial_is_initialized = false;
|
||||
|
||||
}
|
||||
|
||||
// Resize thread team data scratch memory
|
||||
@ -136,9 +138,9 @@ HostThreadTeamData * serial_get_thread_team_data()
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
int Serial::is_initialized()
|
||||
bool Serial::is_initialized()
|
||||
{
|
||||
return 1 ;
|
||||
return Impl::g_serial_is_initialized ;
|
||||
}
|
||||
|
||||
void Serial::initialize( unsigned threads_count
|
||||
@ -158,6 +160,8 @@ void Serial::initialize( unsigned threads_count
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
Kokkos::Profiling::initialize();
|
||||
#endif
|
||||
|
||||
Impl::g_serial_is_initialized = true;
|
||||
}
|
||||
|
||||
void Serial::finalize()
|
||||
@ -177,6 +181,8 @@ void Serial::finalize()
|
||||
#if defined(KOKKOS_ENABLE_PROFILING)
|
||||
Kokkos::Profiling::finalize();
|
||||
#endif
|
||||
|
||||
Impl::g_serial_is_initialized = false;
|
||||
}
|
||||
|
||||
const char* Serial::name() { return "Serial"; }
|
||||
|
||||
@ -47,6 +47,7 @@
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <impl/Kokkos_BitOps.hpp>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
@ -435,21 +436,12 @@ struct power_of_two<1,true>
|
||||
/** \brief If power of two then return power,
|
||||
* otherwise return ~0u.
|
||||
*/
|
||||
static KOKKOS_FORCEINLINE_FUNCTION
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
unsigned power_of_two_if_valid( const unsigned N )
|
||||
{
|
||||
unsigned p = ~0u ;
|
||||
if ( N && ! ( N & ( N - 1 ) ) ) {
|
||||
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_ENABLE_CUDA )
|
||||
p = __ffs(N) - 1 ;
|
||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||
p = __builtin_ffs(N) - 1 ;
|
||||
#elif defined( __INTEL_COMPILER )
|
||||
p = _bit_scan_forward(N);
|
||||
#else
|
||||
p = 0 ;
|
||||
for ( unsigned j = 1 ; ! ( N & j ) ; j <<= 1 ) { ++p ; }
|
||||
#endif
|
||||
if ( is_integral_power_of_two ( N ) ) {
|
||||
p = bit_scan_forward ( N ) ;
|
||||
}
|
||||
return p ;
|
||||
}
|
||||
|
||||
@ -144,9 +144,9 @@ public:
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_FUNCTION_DEFAULTED ~ViewOffset() = default ;
|
||||
KOKKOS_INLINE_FUNCTION ViewOffset() = default ;
|
||||
KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & ) = default ;
|
||||
KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & ) = default ;
|
||||
KOKKOS_FUNCTION_DEFAULTED ViewOffset() = default ;
|
||||
KOKKOS_FUNCTION_DEFAULTED ViewOffset( const ViewOffset & ) = default ;
|
||||
KOKKOS_FUNCTION_DEFAULTED ViewOffset & operator = ( const ViewOffset & ) = default ;
|
||||
|
||||
template< unsigned TrivialScalarSize >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
@ -2,7 +2,16 @@
|
||||
# Add test-only library for gtest to be reused by all the subpackages
|
||||
#
|
||||
|
||||
IF(NOT KOKKOS_HAS_TRILINOS)
|
||||
IF(KOKKOS_SEPARATE_LIBS)
|
||||
set(TEST_LINK_TARGETS kokkoscore)
|
||||
ELSE()
|
||||
set(TEST_LINK_TARGETS kokkos)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest)
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0")
|
||||
|
||||
INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR})
|
||||
TRIBITS_ADD_LIBRARY(
|
||||
@ -63,7 +72,7 @@ IF(Kokkos_ENABLE_Serial)
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
@ -111,7 +120,7 @@ IF(Kokkos_ENABLE_Pthread)
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
@ -160,7 +169,7 @@ IF(Kokkos_ENABLE_OpenMP)
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
@ -194,7 +203,7 @@ IF(Kokkos_ENABLE_Qthreads)
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
@ -251,10 +260,11 @@ IF(Kokkos_ENABLE_Cuda)
|
||||
cuda/TestCuda_ViewOfClass.cpp
|
||||
cuda/TestCuda_Crs.cpp
|
||||
cuda/TestCuda_WorkGraph.cpp
|
||||
cuda/TestCuda_UniqueToken.cpp
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
@ -271,7 +281,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
|
||||
foreach(INITTESTS_NUM RANGE 1 16)
|
||||
@ -281,7 +291,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
endforeach(INITTESTS_NUM)
|
||||
|
||||
@ -291,5 +301,5 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS}
|
||||
)
|
||||
|
||||
@ -27,7 +27,8 @@ endif
|
||||
|
||||
CXXFLAGS = -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
LDFLAGS ?=
|
||||
override LDFLAGS += -lpthread
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
@ -329,7 +330,7 @@ KokkosCore_UnitTest_HWLOC: $(OBJ_HWLOC) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(EXTRA_PATH) $(OBJ_HWLOC) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HWLOC
|
||||
|
||||
KokkosCore_UnitTest_AllocationTracker: $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(EXTRA_PATH) $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LIBS) $( $(KOKKOS_LDFLAGS) $(LDFLAGS)LIB) -o KokkosCore_UnitTest_AllocationTracker
|
||||
$(LINK) $(EXTRA_PATH) $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LIBS) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(LIB) -o KokkosCore_UnitTest_AllocationTracker
|
||||
|
||||
KokkosCore_UnitTest_Default: $(OBJ_DEFAULT) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(EXTRA_PATH) $(OBJ_DEFAULT) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Default
|
||||
|
||||
@ -475,6 +475,8 @@ public:
|
||||
|
||||
namespace Test {
|
||||
|
||||
struct ReducerTag {};
|
||||
|
||||
template< class Scalar, class ExecSpace = Kokkos::DefaultExecutionSpace >
|
||||
struct TestReducers {
|
||||
struct SumFunctor {
|
||||
@ -590,6 +592,118 @@ struct TestReducers {
|
||||
}
|
||||
};
|
||||
|
||||
struct SumFunctorTag {
|
||||
Kokkos::View< const Scalar*, ExecSpace > values;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
|
||||
value += values( i );
|
||||
}
|
||||
};
|
||||
|
||||
struct ProdFunctorTag {
|
||||
Kokkos::View< const Scalar*, ExecSpace > values;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
|
||||
value *= values( i );
|
||||
}
|
||||
};
|
||||
|
||||
struct MinFunctorTag {
|
||||
Kokkos::View< const Scalar*, ExecSpace > values;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
|
||||
if ( values( i ) < value ) value = values( i );
|
||||
}
|
||||
};
|
||||
|
||||
struct MaxFunctorTag {
|
||||
Kokkos::View< const Scalar*, ExecSpace > values;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
|
||||
if ( values( i ) > value ) value = values( i );
|
||||
}
|
||||
};
|
||||
|
||||
struct MinLocFunctorTag {
|
||||
Kokkos::View< const Scalar*, ExecSpace > values;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const ReducerTag, const int & i, typename Kokkos::Experimental::MinLoc< Scalar, int >::value_type & value ) const {
|
||||
if ( values( i ) < value.val ) {
|
||||
value.val = values( i );
|
||||
value.loc = i;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct MaxLocFunctorTag {
|
||||
Kokkos::View< const Scalar*, ExecSpace > values;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const ReducerTag, const int & i, typename Kokkos::Experimental::MaxLoc< Scalar, int >::value_type & value ) const {
|
||||
if ( values( i ) > value.val ) {
|
||||
value.val = values( i );
|
||||
value.loc = i;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct MinMaxLocFunctorTag {
|
||||
Kokkos::View< const Scalar*, ExecSpace > values;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const ReducerTag, const int & i, typename Kokkos::Experimental::MinMaxLoc< Scalar, int >::value_type & value ) const {
|
||||
if ( values( i ) > value.max_val ) {
|
||||
value.max_val = values( i );
|
||||
value.max_loc = i;
|
||||
}
|
||||
|
||||
if ( values( i ) < value.min_val ) {
|
||||
value.min_val = values( i );
|
||||
value.min_loc = i;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct BAndFunctorTag {
|
||||
Kokkos::View< const Scalar*, ExecSpace > values;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
|
||||
value = value & values( i );
|
||||
}
|
||||
};
|
||||
|
||||
struct BOrFunctorTag {
|
||||
Kokkos::View< const Scalar*, ExecSpace > values;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
|
||||
value = value | values( i );
|
||||
}
|
||||
};
|
||||
|
||||
struct LAndFunctorTag {
|
||||
Kokkos::View< const Scalar*, ExecSpace > values;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
|
||||
value = value && values( i );
|
||||
}
|
||||
};
|
||||
|
||||
struct LOrFunctorTag {
|
||||
Kokkos::View< const Scalar*, ExecSpace > values;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const ReducerTag, const int & i, Scalar & value ) const {
|
||||
value = value || values( i );
|
||||
}
|
||||
};
|
||||
static void test_sum( int N ) {
|
||||
Kokkos::View< Scalar*, ExecSpace > values( "Values", N );
|
||||
auto h_values = Kokkos::create_mirror_view( values );
|
||||
@ -603,13 +717,19 @@ struct TestReducers {
|
||||
|
||||
SumFunctor f;
|
||||
f.values = values;
|
||||
SumFunctorTag f_tag;
|
||||
f_tag.values = values;
|
||||
Scalar init = 0;
|
||||
|
||||
{
|
||||
Scalar sum_scalar = init;
|
||||
Kokkos::Experimental::Sum< Scalar > reducer_scalar( sum_scalar );
|
||||
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
|
||||
ASSERT_EQ( sum_scalar, reference_sum );
|
||||
|
||||
sum_scalar = init;
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
|
||||
ASSERT_EQ( sum_scalar, reference_sum );
|
||||
|
||||
Scalar sum_scalar_view = reducer_scalar.reference();
|
||||
@ -643,13 +763,19 @@ struct TestReducers {
|
||||
|
||||
ProdFunctor f;
|
||||
f.values = values;
|
||||
ProdFunctorTag f_tag;
|
||||
f_tag.values = values;
|
||||
Scalar init = 1;
|
||||
|
||||
{
|
||||
Scalar prod_scalar = init;
|
||||
Kokkos::Experimental::Prod< Scalar > reducer_scalar( prod_scalar );
|
||||
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
|
||||
ASSERT_EQ( prod_scalar, reference_prod );
|
||||
|
||||
prod_scalar = init;
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
|
||||
ASSERT_EQ( prod_scalar, reference_prod );
|
||||
|
||||
Scalar prod_scalar_view = reducer_scalar.reference();
|
||||
@ -684,13 +810,19 @@ struct TestReducers {
|
||||
|
||||
MinFunctor f;
|
||||
f.values = values;
|
||||
MinFunctorTag f_tag;
|
||||
f_tag.values = values;
|
||||
Scalar init = std::numeric_limits< Scalar >::max();
|
||||
|
||||
{
|
||||
Scalar min_scalar = init;
|
||||
Kokkos::Experimental::Min< Scalar > reducer_scalar( min_scalar );
|
||||
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
ASSERT_EQ( min_scalar, reference_min );
|
||||
|
||||
min_scalar = init;
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
|
||||
ASSERT_EQ( min_scalar, reference_min );
|
||||
|
||||
Scalar min_scalar_view = reducer_scalar.reference();
|
||||
@ -725,13 +857,19 @@ struct TestReducers {
|
||||
|
||||
MaxFunctor f;
|
||||
f.values = values;
|
||||
MaxFunctorTag f_tag;
|
||||
f_tag.values = values;
|
||||
Scalar init = std::numeric_limits< Scalar >::min();
|
||||
|
||||
{
|
||||
Scalar max_scalar = init;
|
||||
Kokkos::Experimental::Max< Scalar > reducer_scalar( max_scalar );
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
ASSERT_EQ( max_scalar, reference_max );
|
||||
|
||||
max_scalar = init;
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
|
||||
ASSERT_EQ( max_scalar, reference_max );
|
||||
|
||||
Scalar max_scalar_view = reducer_scalar.reference();
|
||||
@ -776,12 +914,19 @@ struct TestReducers {
|
||||
|
||||
MinLocFunctor f;
|
||||
f.values = values;
|
||||
MinLocFunctorTag f_tag;
|
||||
f_tag.values = values;
|
||||
|
||||
{
|
||||
value_type min_scalar;
|
||||
Kokkos::Experimental::MinLoc< Scalar, int > reducer_scalar( min_scalar );
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
ASSERT_EQ( min_scalar.val, reference_min );
|
||||
ASSERT_EQ( min_scalar.loc, reference_loc );
|
||||
|
||||
min_scalar = value_type();
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
|
||||
ASSERT_EQ( min_scalar.val, reference_min );
|
||||
ASSERT_EQ( min_scalar.loc, reference_loc );
|
||||
|
||||
@ -829,12 +974,19 @@ struct TestReducers {
|
||||
|
||||
MaxLocFunctor f;
|
||||
f.values = values;
|
||||
MaxLocFunctorTag f_tag;
|
||||
f_tag.values = values;
|
||||
|
||||
{
|
||||
value_type max_scalar;
|
||||
Kokkos::Experimental::MaxLoc< Scalar, int > reducer_scalar( max_scalar );
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
ASSERT_EQ( max_scalar.val, reference_max );
|
||||
ASSERT_EQ( max_scalar.loc, reference_loc );
|
||||
|
||||
max_scalar = value_type();
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
|
||||
ASSERT_EQ( max_scalar.val, reference_max );
|
||||
ASSERT_EQ( max_scalar.loc, reference_loc );
|
||||
|
||||
@ -898,12 +1050,35 @@ struct TestReducers {
|
||||
|
||||
MinMaxLocFunctor f;
|
||||
f.values = values;
|
||||
MinMaxLocFunctorTag f_tag;
|
||||
f_tag.values = values;
|
||||
|
||||
{
|
||||
value_type minmax_scalar;
|
||||
Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_scalar( minmax_scalar );
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
ASSERT_EQ( minmax_scalar.min_val, reference_min );
|
||||
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
if ( ( i == minmax_scalar.min_loc ) && ( h_values( i ) == reference_min ) ) {
|
||||
reference_minloc = i;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ( minmax_scalar.min_loc, reference_minloc );
|
||||
ASSERT_EQ( minmax_scalar.max_val, reference_max );
|
||||
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
if ( ( i == minmax_scalar.max_loc ) && ( h_values( i ) == reference_max ) ) {
|
||||
reference_maxloc = i;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ( minmax_scalar.max_loc, reference_maxloc );
|
||||
|
||||
minmax_scalar = value_type();
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
|
||||
ASSERT_EQ( minmax_scalar.min_val, reference_min );
|
||||
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
@ -962,14 +1137,21 @@ struct TestReducers {
|
||||
|
||||
BAndFunctor f;
|
||||
f.values = values;
|
||||
BAndFunctorTag f_tag;
|
||||
f_tag.values = values;
|
||||
Scalar init = Scalar() | ( ~Scalar() );
|
||||
|
||||
{
|
||||
Scalar band_scalar = init;
|
||||
Kokkos::Experimental::BAnd< Scalar > reducer_scalar( band_scalar );
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
ASSERT_EQ( band_scalar, reference_band );
|
||||
|
||||
band_scalar = init;
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
|
||||
ASSERT_EQ( band_scalar, reference_band );
|
||||
|
||||
Scalar band_scalar_view = reducer_scalar.reference();
|
||||
|
||||
ASSERT_EQ( band_scalar_view, reference_band );
|
||||
@ -1002,13 +1184,19 @@ struct TestReducers {
|
||||
|
||||
BOrFunctor f;
|
||||
f.values = values;
|
||||
BOrFunctorTag f_tag;
|
||||
f_tag.values = values;
|
||||
Scalar init = Scalar() & ( ~Scalar() );
|
||||
|
||||
{
|
||||
Scalar bor_scalar = init;
|
||||
Kokkos::Experimental::BOr< Scalar > reducer_scalar( bor_scalar );
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
ASSERT_EQ( bor_scalar, reference_bor );
|
||||
|
||||
bor_scalar = init;
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
|
||||
ASSERT_EQ( bor_scalar, reference_bor );
|
||||
|
||||
Scalar bor_scalar_view = reducer_scalar.reference();
|
||||
@ -1042,13 +1230,19 @@ struct TestReducers {
|
||||
|
||||
LAndFunctor f;
|
||||
f.values = values;
|
||||
LAndFunctorTag f_tag;
|
||||
f_tag.values = values;
|
||||
Scalar init = 1;
|
||||
|
||||
{
|
||||
Scalar land_scalar = init;
|
||||
Kokkos::Experimental::LAnd< Scalar > reducer_scalar( land_scalar );
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
ASSERT_EQ( land_scalar, reference_land );
|
||||
|
||||
land_scalar = init;
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
|
||||
ASSERT_EQ( land_scalar, reference_land );
|
||||
|
||||
Scalar land_scalar_view = reducer_scalar.reference();
|
||||
@ -1082,13 +1276,19 @@ struct TestReducers {
|
||||
|
||||
LOrFunctor f;
|
||||
f.values = values;
|
||||
LOrFunctorTag f_tag;
|
||||
f_tag.values = values;
|
||||
Scalar init = 0;
|
||||
|
||||
{
|
||||
Scalar lor_scalar = init;
|
||||
Kokkos::Experimental::LOr< Scalar > reducer_scalar( lor_scalar );
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar );
|
||||
ASSERT_EQ( lor_scalar, reference_lor );
|
||||
|
||||
lor_scalar = init;
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar );
|
||||
ASSERT_EQ( lor_scalar, reference_lor );
|
||||
|
||||
Scalar lor_scalar_view = reducer_scalar.reference();
|
||||
|
||||
@ -46,6 +46,7 @@
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#include <cstdint>
|
||||
|
||||
namespace TestTeamVector {
|
||||
|
||||
@ -840,7 +841,8 @@ public:
|
||||
const ScalarType solution = (ScalarType) nrows * (ScalarType) ncols;
|
||||
|
||||
if ( int64_t(solution) != int64_t(result) ) {
|
||||
printf( " TestTripleNestedReduce failed solution(%ld) != result(%ld), nrows(%d) ncols(%d) league_size(%d) team_size(%d)\n"
|
||||
printf( " TestTripleNestedReduce failed solution(%" PRId64 ") != result(%" PRId64 "),"
|
||||
" nrows(%" PRId32 ") ncols(%" PRId32 ") league_size(%" PRId32 ") team_size(%" PRId32 ")\n"
|
||||
, int64_t(solution)
|
||||
, int64_t(result)
|
||||
, int32_t(nrows)
|
||||
|
||||
@ -79,14 +79,18 @@ struct TestViewMappingSubview
|
||||
typedef Kokkos::View< int***[13][14], Kokkos::LayoutLeft, ExecSpace > DLT;
|
||||
typedef Kokkos::Subview< DLT, range, int, int, int, int > DLS1;
|
||||
|
||||
#if !defined(KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND)
|
||||
static_assert( DLS1::rank == 1 && std::is_same< typename DLS1::array_layout, Kokkos::LayoutLeft >::value
|
||||
, "Subview layout error for rank 1 subview of left-most range of LayoutLeft" );
|
||||
#endif
|
||||
|
||||
typedef Kokkos::View< int***[13][14], Kokkos::LayoutRight, ExecSpace > DRT;
|
||||
typedef Kokkos::Subview< DRT, int, int, int, int, range > DRS1;
|
||||
|
||||
#if !defined(KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND)
|
||||
static_assert( DRS1::rank == 1 && std::is_same< typename DRS1::array_layout, Kokkos::LayoutRight >::value
|
||||
, "Subview layout error for rank 1 subview of right-most range of LayoutRight" );
|
||||
#endif
|
||||
|
||||
AT Aa;
|
||||
AS Ab;
|
||||
|
||||
52
lib/kokkos/core/unit_test/UnitTestConfig.make
Normal file
52
lib/kokkos/core/unit_test/UnitTestConfig.make
Normal file
@ -0,0 +1,52 @@
|
||||
KOKKOS_PATH = ../..
|
||||
|
||||
# See $(KOKKOS_PATH)/Makefile.kokkos and $(KOKKOS_PATH)/generate_makefile.bash
|
||||
KOKKOS_ARCH_OPTIONS="None AMDAVX ARMv80 ARMv81 ARMv8-ThunderX \
|
||||
BGQ Power7 Power8 Power9 \
|
||||
WSM SNB HSW BDW SKX KNC KNL \
|
||||
Kepler Kepler30 Kepler32 Kepler35 Kepler37 \
|
||||
Maxwell Maxwell50 Maxwell52 Maxwell53 Pascal60 Pascal61"
|
||||
#KOKKOS_ARCH_OPTIONS="AMDAVX"
|
||||
|
||||
KOKKOS_DEVICE_OPTIONS="Cuda ROCm OpenMP Pthread Serial Qthreads"
|
||||
#KOKKOS_DEVICE_OPTIONS="Cuda"
|
||||
|
||||
# Configure paths to enable environment query in Makefile.kokkos to work
|
||||
ROCM_HCC_PATH="config"
|
||||
CXX="./config/cxx"
|
||||
ipath=env CXX=$(CXX) env PATH=./config:$$PATH env ROCM_HCC_PATH=$(ROCM_HCC_PATH)
|
||||
|
||||
# Defined in core/src/Makefile -- this should be consistent
|
||||
KOKKOS_MAKEFILE=Makefile.kokkos
|
||||
KOKKOS_CMAKEFILE=kokkos_generated_settings.cmake
|
||||
|
||||
# Defined in Makefile.kokkos -- this should be consistent
|
||||
KOKKOS_INTERNAL_CONFIG_TMP=KokkosCore_config.tmp
|
||||
KOKKOS_CONFIG_HEADER=KokkosCore_config.h
|
||||
|
||||
d='\#'
|
||||
|
||||
# diff => 0 is no difference. if => 0 is false
|
||||
testmake=if test "`testmake.sh $1 $2 $3`" = 'Passed'; then echo OK $d $1; else echo not OK $d $1; fi
|
||||
testconf=if test "`diffconfig.sh $1`" = 'Passed'; then echo OK $d $1; else echo not OK $d $1; fi
|
||||
|
||||
# testing tmp and cmakefile files is unnecessary here
|
||||
test:
|
||||
@for karch in "$(KOKKOS_ARCH_OPTIONS)"; do \
|
||||
for device in "$(KOKKOS_DEVICE_OPTIONS)"; do \
|
||||
$(ipath) KOKKOS_DEVICES=$$device KOKKOS_ARCH=$$karch make -e -f ../src/Makefile build-makefile-cmake-kokkos; \
|
||||
rm -f $(KOKKOS_INTERNAL_CONFIG_TMP) $(KOKKOS_CMAKEFILE); \
|
||||
prfx="$$karch"_"$$device"_; \
|
||||
newmake="$$prfx"$(KOKKOS_MAKEFILE); \
|
||||
newconf="$$prfx"$(KOKKOS_CONFIG_HEADER); \
|
||||
mv $(KOKKOS_MAKEFILE) config/tmpstore/$$newmake; \
|
||||
mv $(KOKKOS_CONFIG_HEADER) config/tmpstore/$$newconf; \
|
||||
$(call testmake,$$newmake,$$karch,$$device); \
|
||||
$(call testconf,$$newconf); \
|
||||
done; \
|
||||
done
|
||||
|
||||
test-cmake:
|
||||
@cd config/cmaketest; \
|
||||
cmake . ; \
|
||||
make test
|
||||
2
lib/kokkos/core/unit_test/config/bin/hcc-config
Executable file
2
lib/kokkos/core/unit_test/config/bin/hcc-config
Executable file
@ -0,0 +1,2 @@
|
||||
#!/bin/sh
|
||||
echo "--foo --bar"
|
||||
5
lib/kokkos/core/unit_test/config/clang
Executable file
5
lib/kokkos/core/unit_test/config/clang
Executable file
@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
echo="Apple LLVM version 8.1.0 (clang-802.0.42)"
|
||||
echo="Target: x86_64-apple-darwin16.7.0"
|
||||
echo="Thread model: posix"
|
||||
echo="InstalledDir: /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin"
|
||||
80
lib/kokkos/core/unit_test/config/cmaketest/CMakeLists.txt
Normal file
80
lib/kokkos/core/unit_test/config/cmaketest/CMakeLists.txt
Normal file
@ -0,0 +1,80 @@
|
||||
cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
|
||||
project(Kokkos CXX)
|
||||
|
||||
enable_testing()
|
||||
|
||||
# Initialization
|
||||
get_filename_component(KOKKOS_TESTDIR ${CMAKE_SOURCE_DIR}/../.. REALPATH)
|
||||
get_filename_component(KOKKOS_SRCDIR ${CMAKE_SOURCE_DIR}/../../../.. REALPATH)
|
||||
set(KOKKOS_SRC_PATH ${KOKKOS_SRCDIR})
|
||||
set(KOKKOS_PATH ${KOKKOS_SRC_PATH})
|
||||
|
||||
set(CXX ${KOKKOS_TESTDIR}/config/cxx)
|
||||
|
||||
# Defined in core/src/Makefile -- this should be consistent
|
||||
set(KOKKOS_MAKEFILE Makefile.kokkos)
|
||||
set(KOKKOS_CMAKEFILE kokkos_generated_settings.cmake)
|
||||
|
||||
# Defined in Makefile.kokkos -- this should be consistent
|
||||
set(KOKKOS_INTERNAL_CONFIG_TMP KokkosCore_config.tmp)
|
||||
set(KOKKOS_CONFIG_HEADER KokkosCore_config.h)
|
||||
|
||||
set(KOKKOS_CMAKE_VERBOSE False)
|
||||
include(${KOKKOS_SRCDIR}/cmake/kokkos_options.cmake)
|
||||
foreach(KOKKOS_DEV ${KOKKOS_DEVICES_LIST})
|
||||
# Do some initialization: Want to turn everything off for testing
|
||||
string(TOUPPER ${KOKKOS_DEV} KOKKOS_DEVUC)
|
||||
set(KOKKOS_ENABLE_${KOKKOS_DEVUC} OFF)
|
||||
endforeach()
|
||||
|
||||
|
||||
#TEST set(KOKKOS_HOST_ARCH_LIST ARMv80)
|
||||
#TEST set(KOKKOS_DEVICES_LIST Cuda)
|
||||
#set(KOKKOS_HOST_ARCH_LIST AMDAVX)
|
||||
#set(KOKKOS_DEVICES_LIST Cuda)
|
||||
|
||||
foreach(KOKKOS_HOST_ARCH ${KOKKOS_HOST_ARCH_LIST})
|
||||
foreach(KOKKOS_DEV ${KOKKOS_DEVICES_LIST})
|
||||
string(TOUPPER ${KOKKOS_DEV} KOKKOS_DEVUC)
|
||||
set(KOKKOS_ENABLE_${KOKKOS_DEVUC} On)
|
||||
|
||||
set(KOKKOS_CMAKE_VERBOSE True)
|
||||
include(${KOKKOS_SRCDIR}/cmake/kokkos_options.cmake)
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ROCM_HCC_PATH=${KOKKOS_TESTDIR}/config)
|
||||
|
||||
#message(STATUS "${KOKKOS_SETTINGS} make -f ${KOKKOS_SRCDIR}/core/src/Makefile build-makefile-cmake-kokkos")
|
||||
execute_process(
|
||||
COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRCDIR}/core/src/Makefile build-makefile-cmake-kokkos
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
OUTPUT_FILE ${CMAKE_BINARY_DIR}/core_src_make.out
|
||||
RESULT_VARIABLE res
|
||||
)
|
||||
#message(STATUS "RESULT ${res}")
|
||||
|
||||
file(REMOVE ${KOKKOS_INTERNAL_CONFIG_TMP} ${KOKKOS_MAKEFILE})
|
||||
set(PREFIX "${KOKKOS_HOST_ARCH}_${KOKKOS_DEV}_")
|
||||
set(NEWCMAKE ${PREFIX}${KOKKOS_CMAKEFILE})
|
||||
set(NEWCONFH ${PREFIX}${KOKKOS_CONFIG_HEADER})
|
||||
file(RENAME ${KOKKOS_CMAKEFILE} ${NEWCMAKE})
|
||||
file(RENAME ${KOKKOS_CONFIG_HEADER} ${NEWCONFH})
|
||||
|
||||
add_test(NAME ${NEWCMAKE}-test
|
||||
COMMAND ${KOKKOS_TESTDIR}/testmake.sh ${NEWCMAKE} ${KOKKOS_HOST_ARCH} ${KOKKOS_DEV}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
set_tests_properties(${NEWCMAKE}-test
|
||||
PROPERTIES PASS_REGULAR_EXPRESSION Passed
|
||||
TIMEOUT 15
|
||||
)
|
||||
add_test(NAME ${NEWCONFH}-test
|
||||
COMMAND ${KOKKOS_TESTDIR}/diffconfig.sh ${NEWCONFH}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
set_tests_properties(${NEWCONFH}-test
|
||||
PROPERTIES PASS_REGULAR_EXPRESSION Passed
|
||||
TIMEOUT 15
|
||||
)
|
||||
set(KOKKOS_ENABLE_${KOKKOS_DEVUC} Off)
|
||||
|
||||
endforeach()
|
||||
endforeach()
|
||||
5
lib/kokkos/core/unit_test/config/cxx
Executable file
5
lib/kokkos/core/unit_test/config/cxx
Executable file
@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
echo "g++ (GCC) 6.3.1 20161221 (Red Hat 6.3.1-1)"
|
||||
echo "Copyright (C) 2016 Free Software Foundation, Inc."
|
||||
echo "This is free software; see the source for copying conditions. There is NO"
|
||||
echo "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
|
||||
5
lib/kokkos/core/unit_test/config/mpic++
Executable file
5
lib/kokkos/core/unit_test/config/mpic++
Executable file
@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
echo "g++ (GCC) 6.3.1 20161221 (Red Hat 6.3.1-1)"
|
||||
echo "Copyright (C) 2016 Free Software Foundation, Inc."
|
||||
echo "This is free software; see the source for copying conditions. There is NO"
|
||||
echo "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
|
||||
5
lib/kokkos/core/unit_test/config/nvcc
Executable file
5
lib/kokkos/core/unit_test/config/nvcc
Executable file
@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
echo "nvcc: NVIDIA (R) Cuda compiler driver"
|
||||
echo "Copyright (c) 2005-2016 NVIDIA Corporation"
|
||||
echo "Built on Tue_Jan_10_13:22:03_CST_2017"
|
||||
echo "Cuda compilation tools, release 8.0, V8.0.61"
|
||||
@ -0,0 +1,18 @@
|
||||
/* ---------------------------------------------
|
||||
Makefile constructed configuration:
|
||||
Fri Sep 22 17:22:09 MDT 2017
|
||||
----------------------------------------------*/
|
||||
#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)
|
||||
#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
|
||||
#else
|
||||
#define KOKKOS_CORE_CONFIG_H
|
||||
#endif
|
||||
/* Execution Spaces */
|
||||
#define KOKKOS_HAVE_CUDA 1
|
||||
#define KOKKOS_HAVE_SERIAL 1
|
||||
/* General Settings */
|
||||
#define KOKKOS_HAVE_CXX11 1
|
||||
#define KOKKOS_ENABLE_PROFILING
|
||||
/* Optimization Settings */
|
||||
/* Cuda Settings */
|
||||
#define KOKKOS_ARCH_AVX 1
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user