diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index f45fc8d9fc..1219352f73 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -1,4 +1,15 @@ +IF(COMMAND TRIBITS_PACKAGE_DECL) + SET(KOKKOS_HAS_TRILINOS ON CACHE BOOL "") +ELSE() + SET(KOKKOS_HAS_TRILINOS OFF CACHE BOOL "") +ENDIF() + +IF(NOT KOKKOS_HAS_TRILINOS) + CMAKE_MINIMUM_REQUIRED(VERSION 2.8.11 FATAL_ERROR) + INCLUDE(cmake/tribits.cmake) +ENDIF() + # # A) Forward delcare the package so that certain options are also defined for # subpackages @@ -12,7 +23,22 @@ TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS) # subpackages as well. # -TRIBITS_ADD_DEBUG_OPTION() + + +# mfh 01 Aug 2016: See Issue #61: +# +# https://github.com/kokkos/kokkos/issues/61 +# +# Don't use TRIBITS_ADD_DEBUG_OPTION() here, because that defines +# HAVE_KOKKOS_DEBUG. We define KOKKOS_HAVE_DEBUG here instead, +# for compatibility with Kokkos' Makefile build system. + +TRIBITS_ADD_OPTION_AND_DEFINE( + ${PACKAGE_NAME}_ENABLE_DEBUG + ${PACKAGE_NAME_UC}_HAVE_DEBUG + "Enable run-time debug checks. These checks may be expensive, so they are disabled by default in a release build." + ${${PROJECT_NAME}_ENABLE_DEBUG} +) TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_SIERRA_BUILD @@ -82,11 +108,33 @@ TRIBITS_ADD_OPTION_AND_DEFINE( "${TPL_ENABLE_MPI}" ) +# Set default value of Kokkos_ENABLE_Debug_Bounds_Check option +# +# CMake is case sensitive. The Kokkos_ENABLE_Debug_Bounds_Check +# option (defined below) is annoyingly not all caps, but we need to +# keep it that way for backwards compatibility. If users forget and +# try using an all-caps variable, then make it count by using the +# all-caps version as the default value of the original, not-all-caps +# option. Otherwise, the default value of this option comes from +# Kokkos_ENABLE_DEBUG (see Issue #367). + +ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_DEBUG) +IF(DEFINED Kokkos_ENABLE_DEBUG_BOUNDS_CHECK) + IF(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK) + SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT ON) + ELSE() + SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}") + ENDIF() +ELSE() + SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}") +ENDIF() +ASSERT_DEFINED(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT) + TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_Debug_Bounds_Check KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK - "Enable bounds checking support in Kokkos." - OFF + "Enable Kokkos::View run-time bounds checking." + "${Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT}" ) TRIBITS_ADD_OPTION_AND_DEFINE( diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index c01ceaf64d..c9b6cc464d 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -7,7 +7,7 @@ CXXFLAGS=$(CCFLAGS) #Options: OpenMP,Serial,Pthreads,Cuda KOKKOS_DEVICES ?= "OpenMP" #KOKKOS_DEVICES ?= "Pthreads" -#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,ARMv8,BGQ,Power7,Power8,KNL +#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv8,BGQ,Power7,Power8,KNL,BDW KOKKOS_ARCH ?= "" #Options: yes,no KOKKOS_DEBUG ?= "no" @@ -97,6 +97,7 @@ KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l)) KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l)) KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l)) KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l)) #NVIDIA based @@ -108,10 +109,12 @@ KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) @@ -123,6 +126,7 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_AR + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) @@ -142,11 +146,11 @@ KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AM #Any AVX? KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc )) -KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW) | bc )) +KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc )) KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) # Decide what ISA level we are able to support -KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) +KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc )) @@ -304,8 +308,8 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -mcpu=power8 - KOKKOS_LDFLAGS += -mcpu=power8 + KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 + KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1) @@ -321,8 +325,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1) else # Assume that this is a really a GNU compiler - KOKKOS_CXXFLAGS += -march=core-avx2 - KOKKOS_LDFLAGS += -march=core-avx2 + KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 + KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 endif endif endif @@ -390,6 +394,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) KOKKOS_CXXFLAGS += -arch=sm_53 endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_61 +endif endif KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h) diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index 876ae033b7..86929ea0fe 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -1,9 +1,5 @@ Kokkos_UnorderedMap_impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp -Kokkos_AllocationTracker.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp -Kokkos_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp Kokkos_Core.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp Kokkos_CPUDiscovery.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp @@ -20,6 +16,10 @@ Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Seria $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp +Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp +Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp @@ -32,12 +32,12 @@ Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_M $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) -Kokkos_Cuda_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp Kokkos_Cuda_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp +Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp Kokkos_Cuda_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp endif @@ -61,6 +61,8 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) Kokkos_OpenMPexec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp +Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp endif Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp diff --git a/lib/kokkos/README b/lib/kokkos/README index 25b3778d95..b094578af6 100644 --- a/lib/kokkos/README +++ b/lib/kokkos/README @@ -37,7 +37,7 @@ hcedwar(at)sandia.gov and crtrott(at)sandia.gov ====Requirements============================================================ ============================================================================ -Primary tested compilers are: +Primary tested compilers on X86 are: GCC 4.7.2 GCC 4.8.4 GCC 4.9.2 @@ -48,26 +48,43 @@ Primary tested compilers are: Clang 3.5.2 Clang 3.6.1 +Primary tested compilers on Power 8 are: + IBM XL 13.1.3 (OpenMP,Serial) + GCC 4.9.2 (OpenMP,Serial) + GCC 5.3.0 (OpenMP,Serial) + Secondary tested compilers are: CUDA 6.5 (with gcc 4.7.2) CUDA 7.0 (with gcc 4.7.2) CUDA 7.5 (with gcc 4.8.4) Other compilers working: - PGI 15.4 - IBM XL 13.1.2 - Cygwin 2.1.0 64bit with gcc 4.9.3 + X86: + Intel 17.0.042 (the FENL example causes internal compiler error) + PGI 15.4 + Cygwin 2.1.0 64bit with gcc 4.9.3 + KNL: + Intel 16.2.181 (the FENL example causes internal compiler error) + Intel 17.0.042 (the FENL example causes internal compiler error) + +Known non-working combinations: + Power8: + GCC 6.1.0 + Pthreads backend + Primary tested compiler are passing in release mode -with warnings as errors. We are using the following set -of flags: +with warnings as errors. They also are tested with a comprehensive set of +backend combinations (i.e. OpenMP, Pthreads, Serial, OpenMP+Serial, ...). +We are using the following set of flags: GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized Secondary compilers are passing without -Werror. -Other compilers are tested occasionally. +Other compilers are tested occasionally, in particular when pushing from develop to +master branch, without -Werror and only for a select set of backends. ============================================================================ ====Getting started========================================================= diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index 192b1d64f8..d7c06dc14b 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -771,6 +771,7 @@ namespace Kokkos { friend class Random_XorShift1024_Pool; public: + typedef Random_XorShift1024_Pool pool_type; typedef DeviceType device_type; enum {MAX_URAND = 0xffffffffU}; @@ -779,10 +780,10 @@ namespace Kokkos { enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; KOKKOS_INLINE_FUNCTION - Random_XorShift1024 (uint64_t* state, int p, int state_idx = 0): + Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0): p_(p),state_idx_(state_idx){ for(int i=0 ; i<16; i++) - state_[i] = state[i]; + state_[i] = state(state_idx,i); } KOKKOS_INLINE_FUNCTION @@ -933,6 +934,7 @@ namespace Kokkos { state_data_type state_; int_view_type p_; int num_states_; + friend class Random_XorShift1024; public: typedef Random_XorShift1024 generator_type; @@ -1001,7 +1003,7 @@ namespace Kokkos { KOKKOS_INLINE_FUNCTION Random_XorShift1024 get_state() const { const int i = DeviceType::hardware_thread_id(); - return Random_XorShift1024(&state_(i,0),p_(i),i); + return Random_XorShift1024(state_,p_(i),i); }; KOKKOS_INLINE_FUNCTION @@ -1020,10 +1022,12 @@ namespace Kokkos { int p_; const int state_idx_; uint64_t* state_; + const int stride_; friend class Random_XorShift1024_Pool; public: typedef Kokkos::Cuda device_type; + typedef Random_XorShift1024_Pool pool_type; enum {MAX_URAND = 0xffffffffU}; enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; @@ -1031,30 +1035,30 @@ namespace Kokkos { enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; KOKKOS_INLINE_FUNCTION - Random_XorShift1024 (uint64_t* state, int p, int state_idx = 0): - p_(p),state_idx_(state_idx),state_(state){ + Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0): + p_(p),state_idx_(state_idx),state_(&state(state_idx,0)),stride_(state.stride_1()){ } KOKKOS_INLINE_FUNCTION uint32_t urand() { - uint64_t state_0 = state_[ p_ ]; - uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; + uint64_t state_0 = state_[ p_ * stride_ ]; + uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; state_1 ^= state_1 << 31; state_1 ^= state_1 >> 11; state_0 ^= state_0 >> 30; - uint64_t tmp = ( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; + uint64_t tmp = ( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; tmp = tmp>>16; return static_cast(tmp&MAX_URAND); } KOKKOS_INLINE_FUNCTION uint64_t urand64() { - uint64_t state_0 = state_[ p_ ]; - uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; + uint64_t state_0 = state_[ p_ * stride_ ]; + uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; state_1 ^= state_1 << 31; state_1 ^= state_1 >> 11; state_0 ^= state_0 >> 30; - return (( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; + return (( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; } KOKKOS_INLINE_FUNCTION @@ -1227,9 +1231,9 @@ Random_XorShift1024 Random_XorShift1024_Pool::get_st if(i>=num_states_) {i = i_offset;} } - return Random_XorShift1024(&state_(i,0), p_(i), i); + return Random_XorShift1024(state_, p_(i), i); #else - return Random_XorShift1024(&state_(0,0), p_(0), 0); + return Random_XorShift1024(state_, p_(0), 0); #endif } @@ -1248,14 +1252,15 @@ void Random_XorShift1024_Pool::free_state(const Random_XorShift102 #endif +namespace Impl { -template +template struct fill_random_functor_range; -template +template struct fill_random_functor_begin_end; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1268,19 +1273,19 @@ struct fill_random_functor_range{ a(a_),rand_pool(rand_pool_),range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (const IndexType& i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) a(idx) = Rand::draw(gen,range); } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1293,12 +1298,12 @@ struct fill_random_functor_range{ a(a_),rand_pool(rand_pool_),range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) a(idx,k) = Rand::draw(gen,range); } } @@ -1307,8 +1312,8 @@ struct fill_random_functor_range{ }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1321,13 +1326,13 @@ struct fill_random_functor_range{ a(a_),rand_pool(rand_pool_),range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) a(idx,k,l) = Rand::draw(gen,range); } } @@ -1335,8 +1340,8 @@ struct fill_random_functor_range{ } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1349,14 +1354,14 @@ struct fill_random_functor_range{ a(a_),rand_pool(rand_pool_),range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) a(idx,k,l,m) = Rand::draw(gen,range); } } @@ -1364,8 +1369,8 @@ struct fill_random_functor_range{ } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1378,15 +1383,15 @@ struct fill_random_functor_range{ a(a_),rand_pool(rand_pool_),range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) a(idx,k,l,m,n) = Rand::draw(gen,range); } } @@ -1394,8 +1399,8 @@ struct fill_random_functor_range{ } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1408,16 +1413,16 @@ struct fill_random_functor_range{ a(a_),rand_pool(rand_pool_),range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) + for(IndexType o=0;o(a.dimension_5());o++) a(idx,k,l,m,n,o) = Rand::draw(gen,range); } } @@ -1425,8 +1430,8 @@ struct fill_random_functor_range{ } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1439,17 +1444,17 @@ struct fill_random_functor_range{ a(a_),rand_pool(rand_pool_),range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) + for(IndexType o=0;o(a.dimension_5());o++) + for(IndexType p=0;p(a.dimension_6());p++) a(idx,k,l,m,n,o,p) = Rand::draw(gen,range); } } @@ -1457,8 +1462,8 @@ struct fill_random_functor_range{ } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1471,26 +1476,26 @@ struct fill_random_functor_range{ a(a_),rand_pool(rand_pool_),range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) + for(IndexType o=0;o(a.dimension_5());o++) + for(IndexType p=0;p(a.dimension_6());p++) + for(IndexType q=0;q(a.dimension_7());q++) a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,range); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1503,19 +1508,19 @@ struct fill_random_functor_begin_end{ a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) a(idx) = Rand::draw(gen,begin,end); } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1528,12 +1533,12 @@ struct fill_random_functor_begin_end{ a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) a(idx,k) = Rand::draw(gen,begin,end); } } @@ -1542,8 +1547,8 @@ struct fill_random_functor_begin_end{ }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1556,13 +1561,13 @@ struct fill_random_functor_begin_end{ a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) a(idx,k,l) = Rand::draw(gen,begin,end); } } @@ -1570,8 +1575,8 @@ struct fill_random_functor_begin_end{ } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1584,14 +1589,14 @@ struct fill_random_functor_begin_end{ a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) a(idx,k,l,m) = Rand::draw(gen,begin,end); } } @@ -1599,8 +1604,8 @@ struct fill_random_functor_begin_end{ } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1613,15 +1618,15 @@ struct fill_random_functor_begin_end{ a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())){ + for(IndexType l=0;l(a.dimension_1());l++) + for(IndexType m=0;m(a.dimension_2());m++) + for(IndexType n=0;n(a.dimension_3());n++) + for(IndexType o=0;o(a.dimension_4());o++) a(idx,l,m,n,o) = Rand::draw(gen,begin,end); } } @@ -1629,8 +1634,8 @@ struct fill_random_functor_begin_end{ } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1643,16 +1648,16 @@ struct fill_random_functor_begin_end{ a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) + for(IndexType o=0;o(a.dimension_5());o++) a(idx,k,l,m,n,o) = Rand::draw(gen,begin,end); } } @@ -1661,8 +1666,8 @@ struct fill_random_functor_begin_end{ }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1675,17 +1680,17 @@ struct fill_random_functor_begin_end{ a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) + for(IndexType o=0;o(a.dimension_5());o++) + for(IndexType p=0;p(a.dimension_6());p++) a(idx,k,l,m,n,o,p) = Rand::draw(gen,begin,end); } } @@ -1693,8 +1698,8 @@ struct fill_random_functor_begin_end{ } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end{ typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; @@ -1707,18 +1712,18 @@ struct fill_random_functor_begin_end{ a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (unsigned int i) const { + void operator() (IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(unsigned int j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) + for(IndexType o=0;o(a.dimension_5());o++) + for(IndexType p=0;p(a.dimension_6());p++) + for(IndexType q=0;q(a.dimension_7());q++) a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,begin,end); } } @@ -1726,18 +1731,20 @@ struct fill_random_functor_begin_end{ } }; -template +} + +template void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) { int64_t LDA = a.dimension_0(); if(LDA>0) - parallel_for((LDA+127)/128,fill_random_functor_range(a,g,range)); + parallel_for((LDA+127)/128,Impl::fill_random_functor_range(a,g,range)); } -template +template void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin,typename ViewType::const_value_type end ) { int64_t LDA = a.dimension_0(); if(LDA>0) - parallel_for((LDA+127)/128,fill_random_functor_begin_end(a,g,begin,end)); + parallel_for((LDA+127)/128,Impl::fill_random_functor_begin_end(a,g,begin,end)); } } diff --git a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp index eade74ed93..c906b9f2cd 100644 --- a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -50,6 +50,7 @@ #include #include #include +#include namespace Test { @@ -207,7 +208,6 @@ struct test_histogram1d_functor { density_1d (d1d), mean (1.0*num_draws/HIST_DIM1D*3) { - printf ("Mean: %e\n", mean); } KOKKOS_INLINE_FUNCTION void @@ -295,7 +295,7 @@ struct test_random_scalar { parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result); //printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2); - double tolerance = 2.0*sqrt(1.0/num_draws); + double tolerance = 1.6*sqrt(1.0/num_draws); double mean_expect = 0.5*Kokkos::rand::max(); double variance_expect = 1.0/3.0*mean_expect*mean_expect; double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0; @@ -303,10 +303,10 @@ struct test_random_scalar { double covariance_eps = result.covariance/num_draws/2/variance_expect; pass_mean = ((-tolerance < mean_eps) && ( tolerance > mean_eps)) ? 1:0; - pass_var = ((-tolerance < variance_eps) && - ( tolerance > variance_eps)) ? 1:0; - pass_covar = ((-1.4*tolerance < covariance_eps) && - ( 1.4*tolerance > covariance_eps)) ? 1:0; + pass_var = ((-1.5*tolerance < variance_eps) && + ( 1.5*tolerance > variance_eps)) ? 1:0; + pass_covar = ((-2.0*tolerance < covariance_eps) && + ( 2.0*tolerance > covariance_eps)) ? 1:0; cerr << "Pass: " << pass_mean << " " << pass_var << " " << mean_eps @@ -328,12 +328,12 @@ struct test_random_scalar { double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0; double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0; double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect; - pass_hist1d_mean = ((-tolerance < mean_eps) && - ( tolerance > mean_eps)) ? 1:0; - pass_hist1d_var = ((-tolerance < variance_eps) && - ( tolerance > variance_eps)) ? 1:0; - pass_hist1d_covar = ((-tolerance < covariance_eps) && - ( tolerance > covariance_eps)) ? 1:0; + pass_hist1d_mean = ((-0.0001 < mean_eps) && + ( 0.0001 > mean_eps)) ? 1:0; + pass_hist1d_var = ((-0.07 < variance_eps) && + ( 0.07 > variance_eps)) ? 1:0; + pass_hist1d_covar = ((-0.06 < covariance_eps) && + ( 0.06 > covariance_eps)) ? 1:0; cerr << "Density 1D: " << mean_eps << " " << variance_eps @@ -363,8 +363,8 @@ struct test_random_scalar { double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect; pass_hist3d_mean = ((-tolerance < mean_eps) && ( tolerance > mean_eps)) ? 1:0; - pass_hist3d_var = ((-tolerance < variance_eps) && - ( tolerance > variance_eps)) ? 1:0; + pass_hist3d_var = ((-1.2*tolerance < variance_eps) && + ( 1.2*tolerance > variance_eps)) ? 1:0; pass_hist3d_covar = ((-tolerance < covariance_eps) && ( tolerance > covariance_eps)) ? 1:0; @@ -386,8 +386,13 @@ void test_random(unsigned int num_draws) typename test_random_functor::type_1d density_1d("D1d"); typename test_random_functor::type_3d density_3d("D3d"); + + uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + cerr << "Test Seed:" << ticks << endl; + + RandomGenerator pool(ticks); + cerr << "Test Scalar=int" << endl; - RandomGenerator pool(31891); test_random_scalar test_int(density_1d,density_3d,pool,num_draws); ASSERT_EQ( test_int.pass_mean,1); ASSERT_EQ( test_int.pass_var,1); diff --git a/lib/kokkos/cmake/deps/CUDA.cmake b/lib/kokkos/cmake/deps/CUDA.cmake new file mode 100644 index 0000000000..801c20067b --- /dev/null +++ b/lib/kokkos/cmake/deps/CUDA.cmake @@ -0,0 +1,79 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + +# Check for CUDA support + +SET(_CUDA_FAILURE OFF) + +# Have CMake find CUDA +IF(NOT _CUDA_FAILURE) + FIND_PACKAGE(CUDA 3.2) + IF (NOT CUDA_FOUND) + SET(_CUDA_FAILURE ON) + ENDIF() +ENDIF() + +IF(NOT _CUDA_FAILURE) + # if we haven't met failure + macro(PACKAGE_ADD_CUDA_LIBRARY cuda_target) + TRIBITS_ADD_LIBRARY(${cuda_target} ${ARGN} CUDALIBRARY) + endmacro() + GLOBAL_SET(TPL_CUDA_LIBRARY_DIRS) + GLOBAL_SET(TPL_CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE}) + GLOBAL_SET(TPL_CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_cufft_LIBRARY}) + TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) +ELSE() + SET(TPL_ENABLE_CUDA OFF) +ENDIF() diff --git a/lib/kokkos/cmake/deps/CUSPARSE.cmake b/lib/kokkos/cmake/deps/CUSPARSE.cmake new file mode 100644 index 0000000000..205f5e2a98 --- /dev/null +++ b/lib/kokkos/cmake/deps/CUSPARSE.cmake @@ -0,0 +1,64 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + +include(${TRIBITS_DEPS_DIR}/CUDA.cmake) + +IF (TPL_ENABLE_CUDA) + GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) + GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) + GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) + TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) +ENDIF() + diff --git a/lib/kokkos/cmake/deps/HWLOC.cmake b/lib/kokkos/cmake/deps/HWLOC.cmake new file mode 100644 index 0000000000..275abd3a5d --- /dev/null +++ b/lib/kokkos/cmake/deps/HWLOC.cmake @@ -0,0 +1,70 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +#----------------------------------------------------------------------------- +# Hardware locality detection and control library. +# +# Acquisition information: +# Date checked: November 2011 +# Checked by: H. Carter Edwards +# Source: http://www.open-mpi.org/projects/hwloc/ +# Version: 1.3 +# + +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC + REQUIRED_HEADERS hwloc.h + REQUIRED_LIBS_NAMES "hwloc" + ) diff --git a/lib/kokkos/cmake/deps/Pthread.cmake b/lib/kokkos/cmake/deps/Pthread.cmake new file mode 100644 index 0000000000..46d0a939ca --- /dev/null +++ b/lib/kokkos/cmake/deps/Pthread.cmake @@ -0,0 +1,83 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +SET(USE_THREADS FALSE) + +IF(NOT TPL_Pthread_INCLUDE_DIRS AND NOT TPL_Pthread_LIBRARY_DIRS AND NOT TPL_Pthread_LIBRARIES) + # Use CMake's Thread finder since it is a bit smarter in determining + # whether pthreads is already built into the compiler and doesn't need + # a library to link. + FIND_PACKAGE(Threads) + #If Threads found a copy of pthreads make sure it is one of the cases the tribits + #tpl system cannot handle. + IF(Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) + IF(CMAKE_THREAD_LIBS_INIT STREQUAL "" OR CMAKE_THREAD_LIBS_INIT STREQUAL "-pthread") + SET(USE_THREADS TRUE) + ENDIF() + ENDIF() +ENDIF() + +IF(USE_THREADS) + SET(TPL_Pthread_INCLUDE_DIRS "") + SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") + SET(TPL_Pthread_LIBRARY_DIRS "") + TIBITS_CREATE_IMPORTED_TPL_LIBRARY(Pthread) +ELSE() + TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread + REQUIRED_HEADERS pthread.h + REQUIRED_LIBS_NAMES pthread + ) +ENDIF() diff --git a/lib/kokkos/cmake/deps/QTHREAD.cmake b/lib/kokkos/cmake/deps/QTHREAD.cmake new file mode 100644 index 0000000000..994b72b200 --- /dev/null +++ b/lib/kokkos/cmake/deps/QTHREAD.cmake @@ -0,0 +1,70 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +#----------------------------------------------------------------------------- +# Hardware locality detection and control library. +# +# Acquisition information: +# Date checked: July 2014 +# Checked by: H. Carter Edwards +# Source: https://code.google.com/p/qthreads +# + +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD + REQUIRED_HEADERS qthread.h + REQUIRED_LIBS_NAMES "qthread" + ) + diff --git a/lib/kokkos/cmake/tribits.cmake b/lib/kokkos/cmake/tribits.cmake new file mode 100644 index 0000000000..34cd216f81 --- /dev/null +++ b/lib/kokkos/cmake/tribits.cmake @@ -0,0 +1,485 @@ +INCLUDE(CMakeParseArguments) +INCLUDE(CTest) + +FUNCTION(ASSERT_DEFINED VARS) + FOREACH(VAR ${VARS}) + IF(NOT DEFINED ${VAR}) + MESSAGE(SEND_ERROR "Error, the variable ${VAR} is not defined!") + ENDIF() + ENDFOREACH() +ENDFUNCTION() + +MACRO(GLOBAL_SET VARNAME) + SET(${VARNAME} ${ARGN} CACHE INTERNAL "") +ENDMACRO() + +MACRO(PREPEND_GLOBAL_SET VARNAME) + ASSERT_DEFINED(${VARNAME}) + GLOBAL_SET(${VARNAME} ${ARGN} ${${VARNAME}}) +ENDMACRO() + +FUNCTION(REMOVE_GLOBAL_DUPLICATES VARNAME) + ASSERT_DEFINED(${VARNAME}) + IF (${VARNAME}) + SET(TMP ${${VARNAME}}) + LIST(REMOVE_DUPLICATES TMP) + GLOBAL_SET(${VARNAME} ${TMP}) + ENDIF() +ENDFUNCTION() + +MACRO(TRIBITS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE) + MESSAGE(STATUS "TRIBITS_ADD_OPTION_AND_DEFINE: '${USER_OPTION_NAME}' '${MACRO_DEFINE_NAME}' '${DEFAULT_VALUE}'") + SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" ) + IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "") + IF(${USER_OPTION_NAME}) + GLOBAL_SET(${MACRO_DEFINE_NAME} ON) + ELSE() + GLOBAL_SET(${MACRO_DEFINE_NAME} OFF) + ENDIF() + ENDIF() +ENDMACRO() + +FUNCTION(TRIBITS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE) + + # Configure the file + CONFIGURE_FILE( + ${PACKAGE_SOURCE_DIR}/cmake/${PACKAGE_NAME_CONFIG_FILE}.in + ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME_CONFIG_FILE} + ) + +ENDFUNCTION() + +MACRO(TRIBITS_ADD_DEBUG_OPTION) + TRIBITS_ADD_OPTION_AND_DEFINE( + ${PROJECT_NAME}_ENABLE_DEBUG + HAVE_${PROJECT_NAME_UC}_DEBUG + "Enable a host of runtime debug checking." + OFF + ) +ENDMACRO() + + +MACRO(TRIBITS_ADD_TEST_DIRECTORIES) + FOREACH(TEST_DIR ${ARGN}) + ADD_SUBDIRECTORY(${TEST_DIR}) + ENDFOREACH() +ENDMACRO() + +MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES) + + IF(${PACKAGE_NAME}_ENABLE_EXAMPLES OR ${PARENT_PACKAGE_NAME}_ENABLE_EXAMPLES) + FOREACH(EXAMPLE_DIR ${ARGN}) + ADD_SUBDIRECTORY(${EXAMPLE_DIR}) + ENDFOREACH() + ENDIF() + +ENDMACRO() + +MACRO(TARGET_TRANSFER_PROPERTY TARGET_NAME PROP_IN PROP_OUT) + SET(PROP_VALUES) + FOREACH(TARGET_X ${ARGN}) + LIST(APPEND PROP_VALUES "$") + ENDFOREACH() + SET_TARGET_PROPERTIES(${TARGET_NAME} PROPERTIES ${PROP_OUT} "${PROP_VALUES}") +ENDMACRO() + +MACRO(ADD_INTERFACE_LIBRARY LIB_NAME) + FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "") + ADD_LIBRARY(${LIB_NAME} STATIC ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp) + SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE) +ENDMACRO() + +# Older versions of cmake does not make include directories transitive +MACRO(TARGET_LINK_AND_INCLUDE_LIBRARIES TARGET_NAME) + TARGET_LINK_LIBRARIES(${TARGET_NAME} LINK_PUBLIC ${ARGN}) + FOREACH(DEP_LIB ${ARGN}) + TARGET_INCLUDE_DIRECTORIES(${TARGET_NAME} PUBLIC $) + TARGET_INCLUDE_DIRECTORIES(${TARGET_NAME} PUBLIC $) + ENDFOREACH() +ENDMACRO() + +FUNCTION(TRIBITS_ADD_LIBRARY LIBRARY_NAME) + + SET(options STATIC SHARED TESTONLY NO_INSTALL_LIB_OR_HEADERS CUDALIBRARY) + SET(oneValueArgs) + SET(multiValueArgs HEADERS HEADERS_INSTALL_SUBDIR NOINSTALLHEADERS SOURCES DEPLIBS IMPORTEDLIBS DEFINES ADDED_LIB_TARGET_NAME_OUT) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + IF(PARSE_HEADERS) + LIST(REMOVE_DUPLICATES PARSE_HEADERS) + ENDIF() + IF(PARSE_SOURCES) + LIST(REMOVE_DUPLICATES PARSE_SOURCES) + ENDIF() + + # Local variable to hold all of the libraries that will be directly linked + # to this library. + SET(LINK_LIBS ${${PACKAGE_NAME}_DEPS}) + + # Add dependent libraries passed directly in + + IF (PARSE_IMPORTEDLIBS) + LIST(APPEND LINK_LIBS ${PARSE_IMPORTEDLIBS}) + ENDIF() + + IF (PARSE_DEPLIBS) + LIST(APPEND LINK_LIBS ${PARSE_DEPLIBS}) + ENDIF() + + # Add the library and all the dependencies + + IF (PARSE_DEFINES) + ADD_DEFINITIONS(${PARSE_DEFINES}) + ENDIF() + + IF (PARSE_STATIC) + SET(STATIC_KEYWORD "STATIC") + ELSE() + SET(STATIC_KEYWORD) + ENDIF() + + IF (PARSE_SHARED) + SET(SHARED_KEYWORD "SHARED") + ELSE() + SET(SHARED_KEYWORD) + ENDIF() + + IF (PARSE_TESTONLY) + SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL") + ELSE() + SET(EXCLUDE_FROM_ALL_KEYWORD) + ENDIF() + IF (NOT PARSE_CUDALIBRARY) + ADD_LIBRARY( + ${LIBRARY_NAME} + ${STATIC_KEYWORD} + ${SHARED_KEYWORD} + ${EXCLUDE_FROM_ALL_KEYWORD} + ${PARSE_HEADERS} + ${PARSE_NOINSTALLHEADERS} + ${PARSE_SOURCES} + ) + ELSE() + CUDA_ADD_LIBRARY( + ${LIBRARY_NAME} + ${PARSE_HEADERS} + ${PARSE_NOINSTALLHEADERS} + ${PARSE_SOURCES} + ) + ENDIF() + + TARGET_LINK_AND_INCLUDE_LIBRARIES(${LIBRARY_NAME} ${LINK_LIBS}) + + IF (NOT PARSE_TESTONLY OR PARSE_NO_INSTALL_LIB_OR_HEADERS) + + INSTALL( + TARGETS ${LIBRARY_NAME} + EXPORT ${PROJECT_NAME} + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + COMPONENT ${PACKAGE_NAME} + ) + + INSTALL( + FILES ${PARSE_HEADERS} + EXPORT ${PROJECT_NAME} + DESTINATION include + COMPONENT ${PACKAGE_NAME} + ) + + INSTALL( + DIRECTORY ${PARSE_HEADERS_INSTALL_SUBDIR} + EXPORT ${PROJECT_NAME} + DESTINATION include + COMPONENT ${PACKAGE_NAME} + ) + + ENDIF() + + IF (NOT PARSE_TESTONLY) + PREPEND_GLOBAL_SET(${PACKAGE_NAME}_LIBS ${LIBRARY_NAME}) + REMOVE_GLOBAL_DUPLICATES(${PACKAGE_NAME}_LIBS) + ENDIF() + +ENDFUNCTION() + +FUNCTION(TRIBITS_ADD_EXECUTABLE EXE_NAME) + + SET(options NOEXEPREFIX NOEXESUFFIX ADD_DIR_TO_NAME INSTALLABLE TESTONLY) + SET(oneValueArgs ADDED_EXE_TARGET_NAME_OUT) + SET(multiValueArgs SOURCES CATEGORIES HOST XHOST HOSTTYPE XHOSTTYPE DIRECTORY TESTONLYLIBS IMPORTEDLIBS DEPLIBS COMM LINKER_LANGUAGE TARGET_DEFINES DEFINES) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + IF (PARSE_TARGET_DEFINES) + TARGET_COMPILE_DEFINITIONS(${EXE_NAME} PUBLIC ${PARSE_TARGET_DEFINES}) + ENDIF() + + SET(LINK_LIBS PACKAGE_${PACKAGE_NAME}) + + IF (PARSE_TESTONLYLIBS) + LIST(APPEND LINK_LIBS ${PARSE_TESTONLYLIBS}) + ENDIF() + + IF (PARSE_IMPORTEDLIBS) + LIST(APPEND LINK_LIBS ${PARSE_IMPORTEDLIBS}) + ENDIF() + + SET (EXE_SOURCES) + IF(PARSE_DIRECTORY) + FOREACH( SOURCE_FILE ${PARSE_SOURCES} ) + IF(IS_ABSOLUTE ${SOURCE_FILE}) + SET (EXE_SOURCES ${EXE_SOURCES} ${SOURCE_FILE}) + ELSE() + SET (EXE_SOURCES ${EXE_SOURCES} ${PARSE_DIRECTORY}/${SOURCE_FILE}) + ENDIF() + ENDFOREACH( ) + ELSE() + FOREACH( SOURCE_FILE ${PARSE_SOURCES} ) + SET (EXE_SOURCES ${EXE_SOURCES} ${SOURCE_FILE}) + ENDFOREACH( ) + ENDIF() + + SET(EXE_BINARY_NAME ${EXE_NAME}) + IF(DEFINED PACKAGE_NAME AND NOT PARSE_NOEXEPREFIX) + SET(EXE_BINARY_NAME ${PACKAGE_NAME}_${EXE_BINARY_NAME}) + ENDIF() + + IF (PARSE_TESTONLY) + SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL") + ELSE() + SET(EXCLUDE_FROM_ALL_KEYWORD) + ENDIF() + ADD_EXECUTABLE(${EXE_BINARY_NAME} ${EXCLUDE_FROM_ALL_KEYWORD} ${EXE_SOURCES}) + + TARGET_LINK_AND_INCLUDE_LIBRARIES(${EXE_BINARY_NAME} ${LINK_LIBS}) + + IF(PARSE_ADDED_EXE_TARGET_NAME_OUT) + SET(${PARSE_ADDED_EXE_TARGET_NAME_OUT} ${EXE_BINARY_NAME} PARENT_SCOPE) + ENDIF() + + IF(PARSE_INSTALLABLE) + INSTALL( + TARGETS ${EXE_BINARY_NAME} + EXPORT ${PROJECT_NAME} + DESTINATION bin + ) + ENDIF() +ENDFUNCTION() + +ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR}) + +FUNCTION(TRIBITS_ADD_EXECUTABLE_AND_TEST EXE_NAME) + + SET(options STANDARD_PASS_OUTPUT WILL_FAIL) + SET(oneValueArgs PASS_REGULAR_EXPRESSION FAIL_REGULAR_EXPRESSION ENVIRONMENT TIMEOUT CATEGORIES ADDED_TESTS_NAMES_OUT ADDED_EXE_TARGET_NAME_OUT) + SET(multiValueArgs) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + TRIBITS_ADD_EXECUTABLE(${EXE_NAME} TESTONLY ADDED_EXE_TARGET_NAME_OUT TEST_NAME ${PARSE_UNPARSED_ARGUMENTS}) + + IF(WIN32) + ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${TEST_NAME}${CMAKE_EXECUTABLE_SUFFIX}) + ELSE() + ADD_TEST(NAME ${TEST_NAME} COMMAND ${TEST_NAME}) + ENDIF() + ADD_DEPENDENCIES(check ${TEST_NAME}) + + IF(PARSE_FAIL_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION ${PARSE_FAIL_REGULAR_EXPRESSION}) + ENDIF() + + IF(PARSE_PASS_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${PARSE_PASS_REGULAR_EXPRESSION}) + ENDIF() + + IF(PARSE_WILL_FAIL) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES WILL_FAIL ${PARSE_WILL_FAIL}) + ENDIF() + + IF(PARSE_ADDED_TESTS_NAMES_OUT) + SET(${PARSE_ADDED_TESTS_NAMES_OUT} ${TEST_NAME} PARENT_SCOPE) + ENDIF() + + IF(PARSE_ADDED_EXE_TARGET_NAME_OUT) + SET(${PARSE_ADDED_EXE_TARGET_NAME_OUT} ${TEST_NAME} PARENT_SCOPE) + ENDIF() + +ENDFUNCTION() + +MACRO(TIBITS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME) + ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME}) + TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES}) + TARGET_INCLUDE_DIRECTORIES(TPL_LIB_${TPL_NAME} INTERFACE ${TPL_${TPL_NAME}_INCLUDE_DIRS}) +ENDMACRO() + +FUNCTION(TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME) + + SET(options MUST_FIND_ALL_LIBS MUST_FIND_ALL_HEADERS NO_PRINT_ENABLE_SUCCESS_FAIL) + SET(oneValueArgs) + SET(multiValueArgs REQUIRED_HEADERS REQUIRED_LIBS_NAMES) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + SET(_${TPL_NAME}_ENABLE_SUCCESS TRUE) + IF (PARSE_REQUIRED_LIBS_NAMES) + FIND_LIBRARY(TPL_${TPL_NAME}_LIBRARIES NAMES ${PARSE_REQUIRED_LIBS_NAMES}) + IF(NOT TPL_${TPL_NAME}_LIBRARIES) + SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) + ENDIF() + ENDIF() + IF (PARSE_REQUIRED_HEADERS) + FIND_PATH(TPL_${TPL_NAME}_INCLUDE_DIRS NAMES ${PARSE_REQUIRED_HEADERS}) + IF(NOT TPL_${TPL_NAME}_INCLUDE_DIRS) + SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) + ENDIF() + ENDIF() + + + IF (_${TPL_NAME}_ENABLE_SUCCESS) + TIBITS_CREATE_IMPORTED_TPL_LIBRARY(${TPL_NAME}) + ENDIF() + +ENDFUNCTION() + +MACRO(TRIBITS_PROCESS_TPL_DEP_FILE TPL_FILE) + GET_FILENAME_COMPONENT(TPL_NAME ${TPL_FILE} NAME_WE) + INCLUDE("${TPL_FILE}") + IF(TARGET TPL_LIB_${TPL_NAME}) + MESSAGE(STATUS "Found tpl library: ${TPL_NAME}") + SET(TPL_ENABLE_${TPL_NAME} TRUE) + ELSE() + MESSAGE(STATUS "Tpl library not found: ${TPL_NAME}") + SET(TPL_ENABLE_${TPL_NAME} FALSE) + ENDIF() +ENDMACRO() + +MACRO(PREPEND_TARGET_SET VARNAME TARGET_NAME TYPE) + IF(TYPE STREQUAL "REQUIRED") + SET(REQUIRED TRUE) + ELSE() + SET(REQUIRED FALSE) + ENDIF() + IF(TARGET ${TARGET_NAME}) + PREPEND_GLOBAL_SET(${VARNAME} ${TARGET_NAME}) + ELSE() + IF(REQUIRED) + MESSAGE(FATAL_ERROR "Missing dependency ${TARGET_NAME}") + ENDIF() + ENDIF() +ENDMACRO() + +MACRO(TRIBITS_APPEND_PACKAGE_DEPS DEP_LIST TYPE) + FOREACH(DEP ${ARGN}) + PREPEND_GLOBAL_SET(${DEP_LIST} PACKAGE_${DEP}) + ENDFOREACH() +ENDMACRO() + +MACRO(TRIBITS_APPEND_TPLS_DEPS DEP_LIST TYPE) + FOREACH(DEP ${ARGN}) + PREPEND_TARGET_SET(${DEP_LIST} TPL_LIB_${DEP} ${TYPE}) + ENDFOREACH() +ENDMACRO() + +MACRO(TRIBITS_ENABLE_TPLS) + FOREACH(TPL ${ARGN}) + IF(TARGET ${TPL}) + GLOBAL_SET(${PACKAGE_NAME}_ENABLE_${TPL} TRUE) + ELSE() + GLOBAL_SET(${PACKAGE_NAME}_ENABLE_${TPL} FALSE) + ENDIF() + ENDFOREACH() +ENDMACRO() + +MACRO(TRIBITS_PACKAGE_DEFINE_DEPENDENCIES) + + SET(options) + SET(oneValueArgs) + SET(multiValueArgs + LIB_REQUIRED_PACKAGES + LIB_OPTIONAL_PACKAGES + TEST_REQUIRED_PACKAGES + TEST_OPTIONAL_PACKAGES + LIB_REQUIRED_TPLS + LIB_OPTIONAL_TPLS + TEST_REQUIRED_TPLS + TEST_OPTIONAL_TPLS + REGRESSION_EMAIL_LIST + SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS + ) + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + GLOBAL_SET(${PACKAGE_NAME}_DEPS "") + TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_DEPS REQUIRED ${PARSE_LIB_REQUIRED_PACKAGES}) + TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_DEPS OPTIONAL ${PARSE_LIB_OPTIONAL_PACKAGES}) + TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_DEPS REQUIRED ${PARSE_LIB_REQUIRED_TPLS}) + TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_DEPS OPTIONAL ${PARSE_LIB_OPTIONAL_TPLS}) + + GLOBAL_SET(${PACKAGE_NAME}_TEST_DEPS "") + TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_TEST_DEPS REQUIRED ${PARSE_TEST_REQUIRED_PACKAGES}) + TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_TEST_DEPS OPTIONAL ${PARSE_TEST_OPTIONAL_PACKAGES}) + TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_TEST_DEPS REQUIRED ${PARSE_TEST_REQUIRED_TPLS}) + TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_TEST_DEPS OPTIONAL ${PARSE_TEST_OPTIONAL_TPLS}) + + TRIBITS_ENABLE_TPLS(${PARSE_LIB_REQUIRED_TPLS} ${PARSE_LIB_OPTIONAL_TPLS} ${PARSE_TEST_REQUIRED_TPLS} ${PARSE_TEST_OPTIONAL_TPLS}) + +ENDMACRO() + +MACRO(TRIBITS_SUBPACKAGE NAME) + SET(PACKAGE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + SET(PARENT_PACKAGE_NAME ${PACKAGE_NAME}) + SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME}) + STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) + + ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME}) + + GLOBAL_SET(${PACKAGE_NAME}_LIBS "") + + INCLUDE(${PACKAGE_SOURCE_DIR}/cmake/Dependencies.cmake) + +ENDMACRO(TRIBITS_SUBPACKAGE) + +MACRO(TRIBITS_SUBPACKAGE_POSTPROCESS) + TARGET_LINK_AND_INCLUDE_LIBRARIES(PACKAGE_${PACKAGE_NAME} ${${PACKAGE_NAME}_LIBS}) +ENDMACRO(TRIBITS_SUBPACKAGE_POSTPROCESS) + +MACRO(TRIBITS_PACKAGE_DECL NAME) + + PROJECT(${NAME}) + STRING(TOUPPER ${PROJECT_NAME} PROJECT_NAME_UC) + SET(PACKAGE_NAME ${PROJECT_NAME}) + STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) + + SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps") + FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake") + FOREACH(TPL_FILE ${TPLS_FILES}) + TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE}) + ENDFOREACH() + +ENDMACRO() + + +MACRO(TRIBITS_PROCESS_SUBPACKAGES) + FILE(GLOB SUBPACKAGES RELATIVE ${CMAKE_SOURCE_DIR} */cmake/Dependencies.cmake) + FOREACH(SUBPACKAGE ${SUBPACKAGES}) + GET_FILENAME_COMPONENT(SUBPACKAGE_CMAKE ${SUBPACKAGE} DIRECTORY) + GET_FILENAME_COMPONENT(SUBPACKAGE_DIR ${SUBPACKAGE_CMAKE} DIRECTORY) + ADD_SUBDIRECTORY(${SUBPACKAGE_DIR}) + ENDFOREACH() +ENDMACRO(TRIBITS_PROCESS_SUBPACKAGES) + +MACRO(TRIBITS_PACKAGE_DEF) +ENDMACRO(TRIBITS_PACKAGE_DEF) + +MACRO(TRIBITS_EXCLUDE_AUTOTOOLS_FILES) +ENDMACRO(TRIBITS_EXCLUDE_AUTOTOOLS_FILES) + +MACRO(TRIBITS_EXCLUDE_FILES) +ENDMACRO(TRIBITS_EXCLUDE_FILES) + +MACRO(TRIBITS_PACKAGE_POSTPROCESS) +ENDMACRO(TRIBITS_PACKAGE_POSTPROCESS) + diff --git a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt new file mode 100644 index 0000000000..9f56f2fd48 --- /dev/null +++ b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt @@ -0,0 +1,153 @@ +// -------------------------------------------------------------------------------- // + +The following steps are for workstations/servers with the SEMS environment installed. + +// -------------------------------------------------------------------------------- // +Summary: + +- Step 1: Rigorous testing of Kokkos' develop branch for each backend (Serial, OpenMP, Threads, Cuda) with all supported compilers. + +- Step 2: Snapshot Kokkos' develop branch into current Trilinos develop branch. + +- Step 3: Build and test Trilinos with combinations of compilers, types, backends. + +- Step 4: Promote Kokkos develop branch to master if the snapshot does not cause any new tests to fail; else track/fix causes of new failures. + +- Step 5: Snapshot Kokkos tagged master branch into Trilinos and push Trilinos. +// -------------------------------------------------------------------------------- // + + +// -------------------------------------------------------------------------------- // + +Step 1: + 1.1. Update kokkos develop branch (NOT a fork) + + (From kokkos directory): + git fetch --all + git checkout develop + git reset --hard origin/develop + + 1.2. Create a testing directory - here the directory is created within the kokkos directory + + mkdir testing + cd testing + + 1.3. Run the test_all_sandia script; various compiler and build-list options can be specified + + ../config/test_all_sandia + + 1.4 Clean repository of untracked files + + cd ../ + git clean -df + +// -------------------------------------------------------------------------------- // + +Step 2: + 2.1 Update Trilinos develop branch + + (From Trilinos directory): + git checkout develop + git fetch --all + git reset --hard origin/develop + git clean -df + + 2.2 Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files + + module load python/2.7.9 + python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages + +// -------------------------------------------------------------------------------- // + +Step 3: + 3.1. Build and test Trilinos with 3 different configurations; a configure-all script is provided in Trilinos and should be modified to test each of the following 3 configurations with appropriate environment variable(s): + + - GCC/4.7.2-OpenMP/Complex + Run tests with the following environment variable: + + export OMP_NUM_THREADS=2 + + + - Intel/15.0.2-Serial/NoComplex + + + - GCC/4.8.4/CUDA/7.5.18-Cuda/Serial/NoComplex + Run tests with the following environment variables: + + export CUDA_LAUNCH_BLOCKING=1 + export CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 + + + mkdir Build + cd Build + cp TRILINOS_PATH/sampleScripts/Sandia-SEMS/configure-all ./ + ** Set the path to Trilinos appropriately within the configure-all script ** + source $SEMS_MODULE_ROOT/utils/sems-modules-init.sh kokkos + source configure-all + make -k (-k means "keep going" to get past build errors; -j12 can also be specified to build with 12 threads, for example) + ctest + + 3.2. Compare the failed test output to the test output on the dashboard ( testing.sandia.gov/cdash select Trilinos ); investigate and fix problems if new tests fail after the Kokkos snapshot + +// -------------------------------------------------------------------------------- // + +Step 4: + 4.1. Once all Trilinos tests pass promote Kokkos develop branch to master on Github + + - DO NOT fast-forward the merge!!!! + + (From kokkos directory): + git checkout master + git fetch --all + # Ensure we are on the current origin/master + git reset --hard origin/master + git merge --no-ff origin/develop + + 4.2. Update the tag in kokkos/config/master_history.txt + Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate + Tag format: #.#.## + + # Prepend master_history.txt with + + # tag: #.#.## + # date: mm/dd/yyyy + # master: sha1 + # develop: sha1 + # ----------------------- + + git commit --amend -a + + git tag -a #.#.## + tag: #.#.## + date: mm/dd/yyyy + master: sha1 + develop: sha1 + + git push --follow-tags origin master + +// -------------------------------------------------------------------------------- // + +Step 5: + 5.1. Make sure Trilinos is up-to-date - chances are other changes have been committed since the integration testing process began. If a substantial change has occurred that may be affected by the snapshot the testing procedure may need to be repeated + + (From Trilinos directory): + git checkout develop + git fetch --all + git reset --hard origin/develop + git clean -df + + 5.2. Snapshot Kokkos master branch into Trilinos + + (From kokkos directory): + git fetch --all + git checkout tags/#.#.## + git clean -df + + python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages + + 5.3. Push the updated develop branch of Trilinos to Github - congratulations!!! + + (From Trilinos directory): + git push + +// -------------------------------------------------------------------------------- // diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt new file mode 100644 index 0000000000..f2eb674578 --- /dev/null +++ b/lib/kokkos/config/master_history.txt @@ -0,0 +1,3 @@ +tag: 2.01.00 date: 07:21:2016 master: xxxxxxxx develop: fa6dfcc4 +tag: 2.01.06 date: 09:02:2016 master: 9afaa87f develop: 555f1a3a + diff --git a/lib/kokkos/config/nvcc_wrapper b/lib/kokkos/config/nvcc_wrapper index d583866191..6093cb61bd 100755 --- a/lib/kokkos/config/nvcc_wrapper +++ b/lib/kokkos/config/nvcc_wrapper @@ -1,17 +1,12 @@ #!/bin/bash # # This shell script (nvcc_wrapper) wraps both the host compiler and -# NVCC, if you are building Trilinos with CUDA enabled. The script -# remedies some differences between the interface of NVCC and that of -# the host compiler, in particular for linking. It also means that -# Trilinos doesn't need separate .cu files; it can just use .cpp -# files. +# NVCC, if you are building legacy C or C++ code with CUDA enabled. +# The script remedies some differences between the interface of NVCC +# and that of the host compiler, in particular for linking. +# It also means that a legacy code doesn't need separate .cu files; +# it can just use .cpp files. # -# Hopefully, at some point, NVIDIA may fix NVCC so as to make this -# script obsolete. For now, this script exists and if you want to -# build Trilinos with CUDA enabled, you must use this script as your -# compiler. - # Default settings: change those according to your machine. For # example, you may have have two different wrappers with either icpc # or g++ as their back-end compiler. The defaults can be overwritten @@ -53,6 +48,10 @@ object_files="" # Link objects for the host linker only object_files_xlinker="" +# Shared libraries with version numbers are not handled correctly by NVCC +shared_versioned_libraries_host="" +shared_versioned_libraries="" + # Does the User set the architecture arch_set=0 @@ -76,6 +75,9 @@ first_xcompiler_arg=1 temp_dir=${TMPDIR:-/tmp} +# Check if we have an optimization argument already +optimization_applied=0 + #echo "Arguments: $# $@" while [ $# -gt 0 ] @@ -97,8 +99,17 @@ do *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu) cpp_files="$cpp_files $1" ;; + # Ensure we only have one optimization flag because NVCC doesn't allow muliple + -O*) + if [ $optimization_applied -eq 1 ]; then + echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting." + else + shared_args="$shared_args $1" + optimization_applied=1 + fi + ;; #Handle shared args (valid for both nvcc and the host compiler) - -O*|-D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) + -D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) shared_args="$shared_args $1" ;; #Handle shared args that have an argument @@ -107,7 +118,7 @@ do shift ;; #Handle known nvcc args - -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage) + -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*) cuda_args="$cuda_args $1" ;; #Handle known nvcc args that have an argument @@ -175,10 +186,15 @@ do object_files_xlinker="$object_files_xlinker -Xlinker $1" ;; #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking - *.so.*|*.dylib) + *.dylib) object_files="$object_files -Xlinker $1" object_files_xlinker="$object_files_xlinker -Xlinker $1" ;; + #Handle shared libraries with *.so.* names which nvcc can't do. + *.so.*) + shared_versioned_libraries_host="$shared_versioned_libraries_host $1" + shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1" + ;; #All other args are sent to the host compiler *) if [ $first_xcompiler_arg -eq 1 ]; then @@ -204,13 +220,13 @@ if [ $arch_set -ne 1 ]; then fi #Compose compilation command -nvcc_command="nvcc $cuda_args $shared_args $xlinker_args" +nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries" if [ $first_xcompiler_arg -eq 0 ]; then nvcc_command="$nvcc_command -Xcompiler $xcompiler_args" fi #Compose host only command -host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args" +host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host" #nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING' if [ $replace_pragma_ident -eq 1 ]; then diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia index add45b77b4..aac036a8f3 100755 --- a/lib/kokkos/config/test_all_sandia +++ b/lib/kokkos/config/test_all_sandia @@ -6,34 +6,36 @@ set -o pipefail +# Determine current machine + +MACHINE="" +HOSTNAME=$(hostname) +if [[ "$HOSTNAME" =~ (white|ride).* ]]; then + MACHINE=white +elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then + MACHINE=bowman +elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name + MACHINE=shepard +elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then + MACHINE=sems +else + echo "Unrecognized machine" >&2 + exit 1 +fi + GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" +IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial" CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial" GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" +IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" CUDA_WARNING_FLAGS="" -BASE_MODULE_LIST="//base,hwloc/1.10.1///base" -CUDA_MODULE_LIST="/,gcc/4.7.2/base" - -export OMP_NUM_THREADS=4 - -declare -i NUM_RESULTS_TO_KEEP=7 - -RESULT_ROOT_PREFIX=TestAll - -source /projects/modulefiles/utils/sems-modules-init.sh -source /projects/modulefiles/utils/kokkos-modules-init.sh - -SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd ) - -# -# Handle arguments -# - +# Default. Machine specific can override DEBUG=False ARGS="" CUSTOM_BUILD_LIST="" @@ -41,6 +43,107 @@ DRYRUN=False BUILD_ONLY=False declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3 TEST_SCRIPT=False +SKIP_HWLOC=False + +ARCH_FLAG="" + +# +# Machine specific config +# + +if [ "$MACHINE" = "sems" ]; then + source /projects/modulefiles/utils/sems-modules-init.sh + source /projects/modulefiles/utils/kokkos-modules-init.sh + + BASE_MODULE_LIST="//base,hwloc/1.10.1///base" + CUDA_MODULE_LIST="/,gcc/4.7.2/base" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "cuda/6.5.14 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + +elif [ "$MACHINE" = "white" ]; then + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 + + BASE_MODULE_LIST="/" + IBM_MODULE_LIST="/xl/" + CUDA_MODULE_LIST="/,gcc/4.9.2" + + # Don't do pthread on white + GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/4.9.2 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.3.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" + ) + + ARCH_FLAG="--arch=Power8" + NUM_JOBS_TO_RUN_IN_PARALLEL=8 + +elif [ "$MACHINE" = "bowman" ]; then + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 + + BASE_MODULE_LIST="/compilers/" + + OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + ) + + ARCH_FLAG="--arch=KNL" + NUM_JOBS_TO_RUN_IN_PARALLEL=8 + +elif [ "$MACHINE" = "shepard" ]; then + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 + + BASE_MODULE_LIST="/compilers/" + + OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + ) + + ARCH_FLAG="--arch=HSW" + NUM_JOBS_TO_RUN_IN_PARALLEL=8 + +else + echo "Unhandled machine $MACHINE" >&2 + exit 1 +fi + +export OMP_NUM_THREADS=4 + +declare -i NUM_RESULTS_TO_KEEP=7 + +RESULT_ROOT_PREFIX=TestAll + +SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd ) + +# +# Handle arguments +# while [[ $# > 0 ]] do @@ -61,6 +164,9 @@ BUILD_ONLY=True --test-script*) TEST_SCRIPT=True ;; +--skip-hwloc*) +SKIP_HWLOC=True +;; --num*) NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}" ;; @@ -73,6 +179,7 @@ echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" echo " Defaults to root repo containing this script" echo "--debug: Run tests in debug. Defaults to False" echo "--test-script: Test this script, not Kokkos" +echo "--skip-hwloc: Do not do hwloc tests" echo "--num=N: Number of jobs to run in parallel " echo "--dry-run: Just print what would be executed" echo "--build-only: Just do builds, don't run anything" @@ -82,21 +189,16 @@ echo " Valid items:" echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial" echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" echo "" + echo "ARGS: list of expressions matching compilers to test" -echo " supported compilers" -echo " gcc/4.7.2" -echo " gcc/4.8.4" -echo " gcc/4.9.2" -echo " gcc/5.1.0" -echo " intel/14.0.4" -echo " intel/15.0.2" -echo " intel/16.0.1" -echo " clang/3.5.2" -echo " clang/3.6.1" -echo " cuda/6.5.14" -echo " cuda/7.0.28" -echo " cuda/7.5.18" +echo " supported compilers sems" +for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + COMPILER=${ARR[0]} + echo " $COMPILER" +done echo "" + echo "Examples:" echo " Run all tests" echo " % test_all_sandia" @@ -147,21 +249,6 @@ if [ -z "$ARGS" ]; then ARGS='?' fi -# Format: (compiler module-list build-list exe-name warning-flag) -COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "cuda/6.5.14 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" - "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" - "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" - ) - # Process args to figure out which compilers to test COMPILERS_TO_TEST="" for ARG in $ARGS; do @@ -240,18 +327,19 @@ run_cmd() { fi } -# report_and_log_test_results +# report_and_log_test_results report_and_log_test_result() { # Use sane var names - local success=$1; local desc=$2; local phase=$3; + local success=$1; local desc=$2; local comment=$3; if [ "$success" = "0" ]; then echo " PASSED $desc" - touch $PASSED_DIR/$desc + echo $comment > $PASSED_DIR/$desc else + # For failures, comment should be the name of the phase that failed echo " FAILED $desc" >&2 - echo $phase > $FAILED_DIR/$desc - cat ${desc}.${phase}.log + echo $comment > $FAILED_DIR/$desc + cat ${desc}.${comment}.log fi } @@ -309,6 +397,8 @@ single_build_and_test() { echo " Starting job $desc" + local comment="no_comment" + if [ "$TEST_SCRIPT" = "True" ]; then local rand=$[ 1 + $[ RANDOM % 10 ]] sleep $rand @@ -316,14 +406,19 @@ single_build_and_test() { run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; } fi else - run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + local -i build_start_time=$(date +%s) run_cmd make build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } + local -i build_end_time=$(date +%s) + comment="build_time=$(($build_end_time-$build_start_time))" if [[ "$BUILD_ONLY" == False ]]; then run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; } + local -i run_end_time=$(date +%s) + comment="$comment run_time=$(($run_end_time-$build_end_time))" fi fi - report_and_log_test_result 0 $desc + report_and_log_test_result 0 $desc "$comment" return 0 } @@ -374,7 +469,7 @@ build_and_test_all() { run_in_background $compiler $build $BUILD_TYPE # If not cuda, do a hwloc test too - if [[ "$compiler" != cuda* ]]; then + if [[ "$compiler" != cuda* && "$SKIP_HWLOC" == False ]]; then run_in_background $compiler $build "hwloc-$BUILD_TYPE" fi done @@ -401,7 +496,11 @@ wait_summarize_and_exit() { echo "PASSED TESTS" echo "#######################################################" - \ls -1 $PASSED_DIR | sort + local passed_test + for passed_test in $(\ls -1 $PASSED_DIR | sort) + do + echo $passed_test $(cat $PASSED_DIR/$passed_test) + done echo "#######################################################" echo "FAILED TESTS" @@ -409,7 +508,7 @@ wait_summarize_and_exit() { local failed_test local -i rv=0 - for failed_test in $(\ls -1 $FAILED_DIR) + for failed_test in $(\ls -1 $FAILED_DIR | sort) do echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)" rv=$rv+1 diff --git a/lib/kokkos/containers/performance_tests/CMakeLists.txt b/lib/kokkos/containers/performance_tests/CMakeLists.txt index 6b57802935..726d403452 100644 --- a/lib/kokkos/containers/performance_tests/CMakeLists.txt +++ b/lib/kokkos/containers/performance_tests/CMakeLists.txt @@ -16,11 +16,22 @@ IF(Kokkos_ENABLE_OpenMP) LIST( APPEND SOURCES TestOpenMP.cpp) ENDIF() -TRIBITS_ADD_EXECUTABLE_AND_TEST( - PerformanceTest +# Per #374, we always want to build this test, but we only want to run +# it as a PERFORMANCE test. That's why we separate building the test +# from running the test. + +TRIBITS_ADD_EXECUTABLE( + PerfTestExec SOURCES ${SOURCES} COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " TESTONLYLIBS kokkos_gtest ) + +TRIBITS_ADD_TEST( + PerformanceTest + NAME PerfTestExec + COMM serial mpi + NUM_MPI_PROCS 1 + CATEGORIES PERFORMANCE + FAIL_REGULAR_EXPRESSION " FAILED " + ) diff --git a/lib/kokkos/containers/performance_tests/TestCuda.cpp b/lib/kokkos/containers/performance_tests/TestCuda.cpp index aee262de93..8183adaa60 100644 --- a/lib/kokkos/containers/performance_tests/TestCuda.cpp +++ b/lib/kokkos/containers/performance_tests/TestCuda.cpp @@ -54,6 +54,8 @@ #if defined( KOKKOS_HAVE_CUDA ) +#include + #include #include @@ -77,6 +79,13 @@ protected: } }; +TEST_F( cuda, dynrankview_perf ) +{ + std::cout << "Cuda" << std::endl; + std::cout << " DynRankView vs View: Initialization Only " << std::endl; + test_dynrankview_op_perf( 4096 ); +} + TEST_F( cuda, global_2_local) { std::cout << "Cuda" << std::endl; diff --git a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp new file mode 100644 index 0000000000..aab6e6988f --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp @@ -0,0 +1,265 @@ + +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_DYNRANKVIEW_HPP +#define KOKKOS_TEST_DYNRANKVIEW_HPP + +#include +#include +#include + +#include + +// Compare performance of DynRankView to View, specific focus on the parenthesis operators + +namespace Performance { + +//View functor +template +struct InitViewFunctor { + typedef Kokkos::View inviewtype; + inviewtype _inview; + + InitViewFunctor( inviewtype &inview_ ) : _inview(inview_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _inview(i,j,k) = i/2 -j*j + k/3; + } + } + } + + struct SumComputationTest + { + typedef Kokkos::View inviewtype; + inviewtype _inview; + + typedef Kokkos::View outviewtype; + outviewtype _outview; + + KOKKOS_INLINE_FUNCTION + SumComputationTest(inviewtype &inview_ , outviewtype &outview_) : _inview(inview_), _outview(outview_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _outview(i) += _inview(i,j,k) ; + } + } + } + }; + +}; + +template +struct InitStrideViewFunctor { + typedef Kokkos::View inviewtype; + inviewtype _inview; + + InitStrideViewFunctor( inviewtype &inview_ ) : _inview(inview_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _inview(i,j,k) = i/2 -j*j + k/3; + } + } + } + +}; + +template +struct InitViewRank7Functor { + typedef Kokkos::View inviewtype; + inviewtype _inview; + + InitViewRank7Functor( inviewtype &inview_ ) : _inview(inview_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _inview(i,j,k,0,0,0,0) = i/2 -j*j + k/3; + } + } + } + +}; + +//DynRankView functor +template +struct InitDynRankViewFunctor { + typedef Kokkos::DynRankView inviewtype; + inviewtype _inview; + + InitDynRankViewFunctor( inviewtype &inview_ ) : _inview(inview_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _inview(i,j,k) = i/2 -j*j + k/3; + } + } + } + + struct SumComputationTest + { + typedef Kokkos::DynRankView inviewtype; + inviewtype _inview; + + typedef Kokkos::DynRankView outviewtype; + outviewtype _outview; + + KOKKOS_INLINE_FUNCTION + SumComputationTest(inviewtype &inview_ , outviewtype &outview_) : _inview(inview_), _outview(outview_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _outview(i) += _inview(i,j,k) ; + } + } + } + }; + +}; + + +template +void test_dynrankview_op_perf( const int par_size ) +{ + + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; + const size_type dim2 = 900; + const size_type dim3 = 300; + + double elapsed_time_view = 0; + double elapsed_time_compview = 0; + double elapsed_time_strideview = 0; + double elapsed_time_view_rank7 = 0; + double elapsed_time_drview = 0; + double elapsed_time_compdrview = 0; + Kokkos::Timer timer; + { + Kokkos::View testview("testview",par_size,dim2,dim3); + typedef InitViewFunctor FunctorType; + + timer.reset(); + Kokkos::RangePolicy policy(0,par_size); + Kokkos::parallel_for( policy , FunctorType(testview) ); + DeviceType::fence(); + elapsed_time_view = timer.seconds(); + std::cout << " View time (init only): " << elapsed_time_view << std::endl; + + + timer.reset(); + Kokkos::View sumview("sumview",par_size); + Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testview, sumview) ); + DeviceType::fence(); + elapsed_time_compview = timer.seconds(); + std::cout << " View sum computation time: " << elapsed_time_view << std::endl; + + + Kokkos::View teststrideview = Kokkos::subview(testview, Kokkos::ALL, Kokkos::ALL,Kokkos::ALL); + typedef InitStrideViewFunctor FunctorStrideType; + + timer.reset(); + Kokkos::parallel_for( policy , FunctorStrideType(teststrideview) ); + DeviceType::fence(); + elapsed_time_strideview = timer.seconds(); + std::cout << " Strided View time (init only): " << elapsed_time_strideview << std::endl; + } + { + Kokkos::View testview("testview",par_size,dim2,dim3,1,1,1,1); + typedef InitViewRank7Functor FunctorType; + + timer.reset(); + Kokkos::RangePolicy policy(0,par_size); + Kokkos::parallel_for( policy , FunctorType(testview) ); + DeviceType::fence(); + elapsed_time_view_rank7 = timer.seconds(); + std::cout << " View Rank7 time (init only): " << elapsed_time_view_rank7 << std::endl; + } + { + Kokkos::DynRankView testdrview("testdrview",par_size,dim2,dim3); + typedef InitDynRankViewFunctor FunctorType; + + timer.reset(); + Kokkos::RangePolicy policy(0,par_size); + Kokkos::parallel_for( policy , FunctorType(testdrview) ); + DeviceType::fence(); + elapsed_time_drview = timer.seconds(); + std::cout << " DynRankView time (init only): " << elapsed_time_drview << std::endl; + + timer.reset(); + Kokkos::DynRankView sumview("sumview",par_size); + Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testdrview, sumview) ); + DeviceType::fence(); + elapsed_time_compdrview = timer.seconds(); + std::cout << " DynRankView sum computation time: " << elapsed_time_compdrview << std::endl; + + } + + std::cout << " Ratio of View to DynRankView time: " << elapsed_time_view / elapsed_time_drview << std::endl; //expect < 1 + std::cout << " Ratio of View to DynRankView sum computation time: " << elapsed_time_compview / elapsed_time_compdrview << std::endl; //expect < 1 + std::cout << " Ratio of View to View Rank7 time: " << elapsed_time_view / elapsed_time_view_rank7 << std::endl; //expect < 1 + std::cout << " Ratio of StrideView to DynRankView time: " << elapsed_time_strideview / elapsed_time_drview << std::endl; //expect < 1 + std::cout << " Ratio of DynRankView to View Rank7 time: " << elapsed_time_drview / elapsed_time_view_rank7 << std::endl; //expect ? + + timer.reset(); + +} //end test_dynrankview + + +} //end Performance +#endif diff --git a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp index fb70b8fe2e..66f1fbf092 100644 --- a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp +++ b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp @@ -178,7 +178,7 @@ void test_global_to_local_ids(unsigned num_ids) std::cout << num_ids << ", "; double elasped_time = 0; - Kokkos::Impl::Timer timer; + Kokkos::Timer timer; local_id_view local_2_global("local_ids", num_ids); global_id_view global_2_local((3u*num_ids)/2u); diff --git a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp index 82a9311df7..da74d32ac1 100644 --- a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp +++ b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp @@ -50,6 +50,8 @@ #include #include +#include + #include #include #include @@ -91,6 +93,13 @@ protected: } }; +TEST_F( openmp, dynrankview_perf ) +{ + std::cout << "OpenMP" << std::endl; + std::cout << " DynRankView vs View: Initialization Only " << std::endl; + test_dynrankview_op_perf( 8192 ); +} + TEST_F( openmp, global_2_local) { std::cout << "OpenMP" << std::endl; diff --git a/lib/kokkos/containers/performance_tests/TestThreads.cpp b/lib/kokkos/containers/performance_tests/TestThreads.cpp index 04d9dc0c18..4179b7de4c 100644 --- a/lib/kokkos/containers/performance_tests/TestThreads.cpp +++ b/lib/kokkos/containers/performance_tests/TestThreads.cpp @@ -52,6 +52,8 @@ #include #include +#include + #include #include #include @@ -85,6 +87,13 @@ protected: } }; +TEST_F( threads, dynrankview_perf ) +{ + std::cout << "Threads" << std::endl; + std::cout << " DynRankView vs View: Initialization Only " << std::endl; + test_dynrankview_op_perf( 8192 ); +} + TEST_F( threads, global_2_local) { std::cout << "Threads" << std::endl; diff --git a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp index 975800229c..71d1182cbe 100644 --- a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp +++ b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp @@ -80,7 +80,7 @@ struct UnorderedMapTest , map(capacity) , histogram(map.get_histogram()) { - Kokkos::Impl::Timer wall_clock ; + Kokkos::Timer wall_clock ; wall_clock.reset(); value_type v = {}; @@ -228,7 +228,7 @@ void run_performance_tests(std::string const & base_file_name) distance_out << "\b\b\b " << std::endl; block_distance_out << "\b\b\b " << std::endl; - Kokkos::Impl::Timer wall_clock ; + Kokkos::Timer wall_clock ; for (int i=0; i < num_collisions ; ++i) { wall_clock.reset(); std::cout << "Collisions: " << collisions[i] << std::endl; diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp index 0fc722c140..f72277700a 100644 --- a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -52,6 +52,12 @@ * 2. Max rank of a DynRankView is 7 * 3. subview name is subdynrankview * 4. Every subdynrankview is returned with LayoutStride + * + * NEW: Redesigned DynRankView + * 5. subview function name now available + * 6. Copy and Copy-Assign View to DynRankView + * 7. deep_copy between Views and DynRankViews + * 8. rank( view ); returns the rank of View or DynRankView */ #ifndef KOKKOS_DYNRANKVIEW_HPP @@ -64,11 +70,16 @@ namespace Kokkos { namespace Experimental { +template< typename DataType , class ... Properties > +class DynRankView; //forward declare + namespace Impl { template struct DynRankDimTraits { + enum : size_t{unspecified = ~size_t(0)}; + // Compute the rank of the view from the nonzero dimension arguments. KOKKOS_INLINE_FUNCTION static size_t computeRank( const size_t N0 @@ -81,13 +92,13 @@ struct DynRankDimTraits { , const size_t N7 ) { return - ( (N6 == 0 && N5 == 0 && N4 == 0 && N3 == 0 && N2 == 0 && N1 == 0 && N0 == 0) ? 0 - : ( (N6 == 0 && N5 == 0 && N4 == 0 && N3 == 0 && N2 == 0 && N1 == 0) ? 1 - : ( (N6 == 0 && N5 == 0 && N4 == 0 && N3 == 0 && N2 == 0) ? 2 - : ( (N6 == 0 && N5 == 0 && N4 == 0 && N3 == 0) ? 3 - : ( (N6 == 0 && N5 == 0 && N4 == 0) ? 4 - : ( (N6 == 0 && N5 == 0) ? 5 - : ( (N6 == 0) ? 6 + ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified && N1 == unspecified && N0 == unspecified) ? 0 + : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified && N1 == unspecified) ? 1 + : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified) ? 2 + : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified) ? 3 + : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified) ? 4 + : ( (N6 == unspecified && N5 == unspecified) ? 5 + : ( (N6 == unspecified) ? 6 : 7 ) ) ) ) ) ) ); } @@ -112,14 +123,14 @@ struct DynRankDimTraits { KOKKOS_INLINE_FUNCTION static typename std::enable_if< (std::is_same::value || std::is_same::value) , Layout >::type createLayout( const Layout& layout ) { - return Layout( layout.dimension[0] != 0 ? layout.dimension[0] : 1 - , layout.dimension[1] != 0 ? layout.dimension[1] : 1 - , layout.dimension[2] != 0 ? layout.dimension[2] : 1 - , layout.dimension[3] != 0 ? layout.dimension[3] : 1 - , layout.dimension[4] != 0 ? layout.dimension[4] : 1 - , layout.dimension[5] != 0 ? layout.dimension[5] : 1 - , layout.dimension[6] != 0 ? layout.dimension[6] : 1 - , layout.dimension[7] != 0 ? layout.dimension[7] : 1 + return Layout( layout.dimension[0] != unspecified ? layout.dimension[0] : 1 + , layout.dimension[1] != unspecified ? layout.dimension[1] : 1 + , layout.dimension[2] != unspecified ? layout.dimension[2] : 1 + , layout.dimension[3] != unspecified ? layout.dimension[3] : 1 + , layout.dimension[4] != unspecified ? layout.dimension[4] : 1 + , layout.dimension[5] != unspecified ? layout.dimension[5] : 1 + , layout.dimension[6] != unspecified ? layout.dimension[6] : 1 + , layout.dimension[7] != unspecified ? layout.dimension[7] : 1 ); } @@ -128,21 +139,21 @@ struct DynRankDimTraits { KOKKOS_INLINE_FUNCTION static typename std::enable_if< (std::is_same::value) , Layout>::type createLayout( const Layout& layout ) { - return Layout( layout.dimension[0] != 0 ? layout.dimension[0] : 1 + return Layout( layout.dimension[0] != unspecified ? layout.dimension[0] : 1 , layout.stride[0] - , layout.dimension[1] != 0 ? layout.dimension[1] : 1 + , layout.dimension[1] != unspecified ? layout.dimension[1] : 1 , layout.stride[1] - , layout.dimension[2] != 0 ? layout.dimension[2] : 1 + , layout.dimension[2] != unspecified ? layout.dimension[2] : 1 , layout.stride[2] - , layout.dimension[3] != 0 ? layout.dimension[3] : 1 + , layout.dimension[3] != unspecified ? layout.dimension[3] : 1 , layout.stride[3] - , layout.dimension[4] != 0 ? layout.dimension[4] : 1 + , layout.dimension[4] != unspecified ? layout.dimension[4] : 1 , layout.stride[4] - , layout.dimension[5] != 0 ? layout.dimension[5] : 1 + , layout.dimension[5] != unspecified ? layout.dimension[5] : 1 , layout.stride[5] - , layout.dimension[6] != 0 ? layout.dimension[6] : 1 + , layout.dimension[6] != unspecified ? layout.dimension[6] : 1 , layout.stride[6] - , layout.dimension[7] != 0 ? layout.dimension[7] : 1 + , layout.dimension[7] != unspecified ? layout.dimension[7] : 1 , layout.stride[7] ); } @@ -161,17 +172,141 @@ struct DynRankDimTraits { , const size_t N7 ) { return ViewType( arg - , N0 != 0 ? N0 : 1 - , N1 != 0 ? N1 : 1 - , N2 != 0 ? N2 : 1 - , N3 != 0 ? N3 : 1 - , N4 != 0 ? N4 : 1 - , N5 != 0 ? N5 : 1 - , N6 != 0 ? N6 : 1 - , N7 != 0 ? N7 : 1 ); + , N0 != unspecified ? N0 : 1 + , N1 != unspecified ? N1 : 1 + , N2 != unspecified ? N2 : 1 + , N3 != unspecified ? N3 : 1 + , N4 != unspecified ? N4 : 1 + , N5 != unspecified ? N5 : 1 + , N6 != unspecified ? N6 : 1 + , N7 != unspecified ? N7 : 1 ); } }; + // Non-strided Layout + template + KOKKOS_INLINE_FUNCTION + static typename std::enable_if< (std::is_same::value || std::is_same::value) && std::is_integral::value , Layout >::type reconstructLayout( const Layout& layout , iType dynrank ) + { + return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0) + , dynrank > 1 ? layout.dimension[1] : ~size_t(0) + , dynrank > 2 ? layout.dimension[2] : ~size_t(0) + , dynrank > 3 ? layout.dimension[3] : ~size_t(0) + , dynrank > 4 ? layout.dimension[4] : ~size_t(0) + , dynrank > 5 ? layout.dimension[5] : ~size_t(0) + , dynrank > 6 ? layout.dimension[6] : ~size_t(0) + , dynrank > 7 ? layout.dimension[7] : ~size_t(0) + ); + } + + // LayoutStride + template + KOKKOS_INLINE_FUNCTION + static typename std::enable_if< (std::is_same::value) && std::is_integral::value , Layout >::type reconstructLayout( const Layout& layout , iType dynrank ) + { + return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0) + , dynrank > 0 ? layout.stride[0] : (0) + , dynrank > 1 ? layout.dimension[1] : ~size_t(0) + , dynrank > 1 ? layout.stride[1] : (0) + , dynrank > 2 ? layout.dimension[2] : ~size_t(0) + , dynrank > 2 ? layout.stride[2] : (0) + , dynrank > 3 ? layout.dimension[3] : ~size_t(0) + , dynrank > 3 ? layout.stride[3] : (0) + , dynrank > 4 ? layout.dimension[4] : ~size_t(0) + , dynrank > 4 ? layout.stride[4] : (0) + , dynrank > 5 ? layout.dimension[5] : ~size_t(0) + , dynrank > 5 ? layout.stride[5] : (0) + , dynrank > 6 ? layout.dimension[6] : ~size_t(0) + , dynrank > 6 ? layout.stride[6] : (0) + , dynrank > 7 ? layout.dimension[7] : ~size_t(0) + , dynrank > 7 ? layout.stride[7] : (0) + ); + } + + template < typename DynRankViewType , typename iType > + void verify_dynrankview_rank ( iType N , const DynRankViewType &drv ) + { + if ( static_cast(drv.rank()) > N ) + { + Kokkos::abort( "Need at least rank arguments to the operator()" ); + } + } + + +/** \brief Assign compatible default mappings */ +struct ViewToDynRankViewTag {}; + +template< class DstTraits , class SrcTraits > +class ViewMapping< DstTraits , SrcTraits , + typename std::enable_if<( + std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value + && + std::is_same< typename DstTraits::specialize , void >::value + && + std::is_same< typename SrcTraits::specialize , void >::value + && + ( + std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value + || + ( + ( + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value + ) + && + ( + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value + ) + ) + ) + ) , ViewToDynRankViewTag >::type > +{ +private: + + enum { is_assignable_value_type = + std::is_same< typename DstTraits::value_type + , typename SrcTraits::value_type >::value || + std::is_same< typename DstTraits::value_type + , typename SrcTraits::const_value_type >::value }; + + enum { is_assignable_layout = + std::is_same< typename DstTraits::array_layout + , typename SrcTraits::array_layout >::value || + std::is_same< typename DstTraits::array_layout + , Kokkos::LayoutStride >::value + }; + +public: + + enum { is_assignable = is_assignable_value_type && + is_assignable_layout }; + + typedef ViewMapping< DstTraits , void > DstType ; + typedef ViewMapping< SrcTraits , void > SrcType ; + + template < typename DT , typename ... DP , typename ST , typename ... SP > + KOKKOS_INLINE_FUNCTION + static void assign( Kokkos::Experimental::DynRankView< DT , DP...> & dst , const Kokkos::View< ST , SP... > & src ) + { + static_assert( is_assignable_value_type + , "View assignment must have same value type or const = non-const" ); + + static_assert( is_assignable_layout + , "View assignment must have compatible layout or have rank <= 1" ); + + // Removed dimension checks... + + typedef typename DstType::offset_type dst_offset_type ; + dst.m_map.m_offset = dst_offset_type(std::integral_constant() , src.layout() ); //Check this for integer input1 for padding, etc + dst.m_map.m_handle = Kokkos::Experimental::Impl::ViewDataHandle< DstTraits >::assign( src.m_map.m_handle , src.m_track ); + dst.m_track.assign( src.m_track , DstTraits::is_managed ); + dst.m_rank = src.Rank ; + } +}; + } //end Impl /* \class DynRankView @@ -185,145 +320,228 @@ struct DynRankDimTraits { * 3. subview name is subdynrankview * 4. Every subdynrankview is returned with LayoutStride * + * NEW: Redesigned DynRankView + * 5. subview function name now available + * 6. Copy and Copy-Assign View to DynRankView + * 7. deep_copy between Views and DynRankViews + * 8. rank( view ); returns the rank of View or DynRankView + * */ +template< class > struct is_dyn_rank_view : public std::false_type {}; + +template< class D, class ... P > +struct is_dyn_rank_view< Kokkos::Experimental::DynRankView > : public std::true_type {}; + + template< typename DataType , class ... Properties > -class DynRankView : private View< DataType*******, Properties... > +class DynRankView : public ViewTraits< DataType , Properties ... > { static_assert( !std::is_array::value && !std::is_pointer::value , "Cannot template DynRankView with array or pointer datatype - must be pod" ); -public: - using view_type = View< DataType******* , Properties...>; - using reference_type = typename view_type::reference_type; - private: template < class , class ... > friend class DynRankView ; - template< class , class ... > friend class Impl::ViewMapping ; +// template < class , class ... > friend class Kokkos::Experimental::View ; //unnecessary now... + template < class , class ... > friend class Impl::ViewMapping ; + +public: + typedef ViewTraits< DataType , Properties ... > drvtraits ; + + typedef View< DataType******* , Properties...> view_type ; + + typedef ViewTraits< DataType******* , Properties ... > traits ; + + +private: + typedef Kokkos::Experimental::Impl::ViewMapping< traits , void > map_type ; + typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; + + track_type m_track ; + map_type m_map ; unsigned m_rank; -public: +public: KOKKOS_INLINE_FUNCTION - view_type & DownCast() const { return static_cast< view_type & > (*this); } + view_type & DownCast() const { return ( view_type & ) (*this); } KOKKOS_INLINE_FUNCTION - const view_type & ConstDownCast() const { return static_cast< const view_type & > (*this); } + const view_type & ConstDownCast() const { return (const view_type & ) (*this); } - typedef ViewTraits< DataType , Properties ... > traits ; - - // Data type traits: - typedef typename traits::data_type data_type; - typedef typename traits::const_data_type const_data_type; - typedef typename traits::non_const_data_type non_const_data_type; - - // Compatible array of trivial type traits: - typedef typename traits::scalar_array_type scalar_array_type ; - typedef typename traits::const_scalar_array_type const_scalar_array_type ; - typedef typename traits::non_const_scalar_array_type non_const_scalar_array_type ; - - // Value type traits: - typedef typename traits::value_type value_type ; - typedef typename traits::const_value_type const_value_type ; - typedef typename traits::non_const_value_type non_const_value_type ; - - // Mapping traits: - typedef typename traits::array_layout array_layout ; - typedef typename traits::specialize specialize ; - - // Execution space, memory space, memory access traits, and host mirror space: - typedef typename traits::execution_space execution_space ; - typedef typename traits::memory_space memory_space ; - typedef typename traits::device_type device_type ; - typedef typename traits::memory_traits memory_traits ; - typedef typename traits::host_mirror_space host_mirror_space ; - - typedef typename traits::size_type size_type ; - - using view_type::is_hostspace ; - using view_type::is_managed ; - using view_type::is_random_access ; + //Types below - at least the HostMirror requires the value_type, NOT the rank 7 data_type of the traits /** \brief Compatible view of array of scalar types */ - typedef DynRankView< typename traits::scalar_array_type , - typename traits::array_layout , - typename traits::device_type , - typename traits::memory_traits > + typedef DynRankView< typename drvtraits::scalar_array_type , + typename drvtraits::array_layout , + typename drvtraits::device_type , + typename drvtraits::memory_traits > array_type ; /** \brief Compatible view of const data type */ - typedef DynRankView< typename traits::const_data_type , - typename traits::array_layout , - typename traits::device_type , - typename traits::memory_traits > + typedef DynRankView< typename drvtraits::const_data_type , + typename drvtraits::array_layout , + typename drvtraits::device_type , + typename drvtraits::memory_traits > const_type ; /** \brief Compatible view of non-const data type */ - typedef DynRankView< typename traits::non_const_data_type , - typename traits::array_layout , - typename traits::device_type , - typename traits::memory_traits > + typedef DynRankView< typename drvtraits::non_const_data_type , + typename drvtraits::array_layout , + typename drvtraits::device_type , + typename drvtraits::memory_traits > non_const_type ; /** \brief Compatible HostMirror view */ - typedef DynRankView< typename traits::non_const_data_type , - typename traits::array_layout , - typename traits::host_mirror_space > + typedef DynRankView< typename drvtraits::non_const_data_type , + typename drvtraits::array_layout , + typename drvtraits::host_mirror_space > HostMirror ; + //---------------------------------------- // Domain rank and extents +// enum { Rank = map_type::Rank }; //Will be dyn rank of 7 always, keep the enum? + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral::value , size_t >::type + extent( const iType & r ) const + { return m_map.extent(r); } + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral::value , int >::type + extent_int( const iType & r ) const + { return static_cast(m_map.extent(r)); } + + KOKKOS_INLINE_FUNCTION constexpr + typename traits::array_layout layout() const + { return m_map.layout(); } + + //---------------------------------------- + /* Deprecate all 'dimension' functions in favor of + * ISO/C++ vocabulary 'extent'. + */ + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral::value , size_t >::type + dimension( const iType & r ) const { return extent( r ); } + + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_map.dimension_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_map.dimension_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_map.dimension_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_map.dimension_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_map.dimension_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_map.dimension_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_map.dimension_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_map.dimension_7(); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION constexpr size_t size() const { return m_map.dimension_0() * + m_map.dimension_1() * + m_map.dimension_2() * + m_map.dimension_3() * + m_map.dimension_4() * + m_map.dimension_5() * + m_map.dimension_6() * + m_map.dimension_7(); } + + KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_map.stride_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_map.stride_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_map.stride_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_map.stride_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_map.stride_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_map.stride_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_map.stride_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_map.stride_7(); } + + template< typename iType > + KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_map.stride(s); } + + //---------------------------------------- + // Range span is the span which contains all members. + + typedef typename map_type::reference_type reference_type ; + typedef typename map_type::pointer_type pointer_type ; + + enum { reference_type_is_lvalue_reference = std::is_lvalue_reference< reference_type >::value }; + + KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_map.span(); } + // Deprecated, use 'span()' instead + KOKKOS_INLINE_FUNCTION constexpr size_t capacity() const { return m_map.span(); } + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_map.span_is_contiguous(); } + KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { return m_map.data(); } + + // Deprecated, use 'span_is_contigous()' instead + KOKKOS_INLINE_FUNCTION constexpr bool is_contiguous() const { return m_map.span_is_contiguous(); } + // Deprecated, use 'data()' instead + KOKKOS_INLINE_FUNCTION constexpr pointer_type ptr_on_device() const { return m_map.data(); } + + //---------------------------------------- + // Allow specializations to query their specialized map + KOKKOS_INLINE_FUNCTION - DynRankView() : view_type() , m_rank(0) {} + const Kokkos::Experimental::Impl::ViewMapping< traits , void > & + implementation_map() const { return m_map ; } + + //---------------------------------------- + +private: + + enum { + is_layout_left = std::is_same< typename traits::array_layout + , Kokkos::LayoutLeft >::value , + + is_layout_right = std::is_same< typename traits::array_layout + , Kokkos::LayoutRight >::value , + + is_layout_stride = std::is_same< typename traits::array_layout + , Kokkos::LayoutStride >::value , + + is_default_map = + std::is_same< typename traits::specialize , void >::value && + ( is_layout_left || is_layout_right || is_layout_stride ) + }; + +// Bounds checking macros +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + +#define KOKKOS_VIEW_OPERATOR_VERIFY( N , ARG ) \ + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \ + < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); \ + Kokkos::Experimental::Impl::verify_dynrankview_rank ( N , *this ) ; \ + Kokkos::Experimental::Impl::view_verify_operator_bounds ARG ; + +#else + +#define KOKKOS_VIEW_OPERATOR_VERIFY( N , ARG ) \ + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \ + < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); + +#endif + +public: KOKKOS_INLINE_FUNCTION constexpr unsigned rank() const { return m_rank; } - using view_type::extent; - using view_type::extent_int; - using view_type::layout; - using view_type::dimension; - using view_type::size; - using view_type::stride; - - using pointer_type = typename view_type::pointer_type; - using view_type::reference_type_is_lvalue_reference; - using view_type::span; - using view_type::capacity; - using view_type::span_is_contiguous; - using view_type::data; - using view_type::implementation_map; - - using view_type::is_contiguous; - using view_type::ptr_on_device; - - //Deprecated, remove soon (add for test) - using view_type::dimension_0; - using view_type::dimension_1; - using view_type::dimension_2; - using view_type::dimension_3; - using view_type::dimension_4; - using view_type::dimension_5; - using view_type::dimension_6; - using view_type::dimension_7; - using view_type::stride_0; - using view_type::stride_1; - using view_type::stride_2; - using view_type::stride_3; - using view_type::stride_4; - using view_type::stride_5; - using view_type::stride_6; - using view_type::stride_7; //operators () // Rank 0 KOKKOS_INLINE_FUNCTION reference_type operator()() const - { return view_type::operator()(0,0,0,0,0,0,0); } - + { + KOKKOS_VIEW_OPERATOR_VERIFY( 0 , ( implementation_map() ) ) + return implementation_map().reference(); + //return m_map.reference(0,0,0,0,0,0,0); + } + // Rank 1 // This assumes a contiguous underlying memory (i.e. no padding, no striding...) template< typename iType > KOKKOS_INLINE_FUNCTION - typename std::enable_if< std::is_same::value && std::is_integral::value, reference_type>::type + typename std::enable_if< std::is_same::value && std::is_integral::value, reference_type>::type operator[](const iType & i0) const { return data()[i0]; @@ -333,59 +551,141 @@ public: // AND a Trilinos/Sacado scalar type ) template< typename iType > KOKKOS_INLINE_FUNCTION - typename std::enable_if< !std::is_same::value && std::is_integral::value, reference_type>::type + typename std::enable_if< !std::is_same::value && std::is_integral::value, reference_type>::type operator[](const iType & i0) const { - auto map = implementation_map(); - - const size_t dim_scalar = map.dimension_scalar(); +// auto map = implementation_map(); + const size_t dim_scalar = m_map.dimension_scalar(); const size_t bytes = this->span() / dim_scalar; - typedef Kokkos::View > tmp_view_type; + typedef Kokkos::View > tmp_view_type; tmp_view_type rankone_view(this->data(), bytes, dim_scalar); return rankone_view(i0); } template< typename iType > KOKKOS_INLINE_FUNCTION - reference_type operator()(const iType & i0 ) const - { return view_type::operator()(i0,0,0,0,0,0,0); } + typename std::enable_if< (std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType & i0 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 1 , ( m_map , i0 ) ) + return m_map.reference(i0); + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType & i0 ) const + { + return m_map.reference(i0,0,0,0,0,0,0); + } // Rank 2 template< typename iType0 , typename iType1 > KOKKOS_INLINE_FUNCTION - reference_type operator()(const iType0 & i0 , const iType1 & i1 ) const - { return view_type::operator()(i0,i1,0,0,0,0,0); } + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 2 , ( m_map , i0 , i1 ) ) + return m_map.reference(i0,i1); + } + + template< typename iType0 , typename iType1 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 2 , ( m_map , i0 , i1 ) ) + return m_map.reference(i0,i1,0,0,0,0,0); + } // Rank 3 template< typename iType0 , typename iType1 , typename iType2 > KOKKOS_INLINE_FUNCTION - reference_type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { return view_type::operator()(i0,i1,i2,0,0,0,0); } + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 3 , ( m_map , i0 , i1 , i2 ) ) + return m_map.reference(i0,i1,i2); + } + + template< typename iType0 , typename iType1 , typename iType2 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 3 , ( m_map , i0 , i1 , i2 ) ) + return m_map.reference(i0,i1,i2,0,0,0,0); + } // Rank 4 template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > KOKKOS_INLINE_FUNCTION - reference_type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { return view_type::operator()(i0,i1,i2,i3,0,0,0); } + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 4 , ( m_map , i0 , i1 , i2 , i3 ) ) + return m_map.reference(i0,i1,i2,i3); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 4 , ( m_map , i0 , i1 , i2 , i3 ) ) + return m_map.reference(i0,i1,i2,i3,0,0,0); + } // Rank 5 template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > KOKKOS_INLINE_FUNCTION - reference_type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const - { return view_type::operator()(i0,i1,i2,i3,i4,0,0); } + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 5 , ( m_map , i0 , i1 , i2 , i3 , i4 ) ) + return m_map.reference(i0,i1,i2,i3,i4); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 5 , ( m_map , i0 , i1 , i2 , i3 , i4 ) ) + return m_map.reference(i0,i1,i2,i3,i4,0,0); + } // Rank 6 template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > KOKKOS_INLINE_FUNCTION - reference_type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const - { return view_type::operator()(i0,i1,i2,i3,i4,i5,0); } + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 6 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 ) ) + return m_map.reference(i0,i1,i2,i3,i4,i5); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 6 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 ) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,0); + } // Rank 7 template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 , typename iType6 > KOKKOS_INLINE_FUNCTION - reference_type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const - { return view_type::operator()(i0,i1,i2,i3,i4,i5,i6); } + typename std::enable_if< (std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 7 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 , i6 ) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,i6); + } + +#undef KOKKOS_VIEW_OPERATOR_VERIFY //---------------------------------------- // Standard constructor, destructor, and assignment operators... @@ -394,46 +694,89 @@ public: ~DynRankView() {} KOKKOS_INLINE_FUNCTION - DynRankView( const DynRankView & ) = default ; + DynRankView() : m_track(), m_map(), m_rank() {} //Default ctor KOKKOS_INLINE_FUNCTION - DynRankView( DynRankView && ) = default ; + DynRankView( const DynRankView & rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ), m_rank(rhs.m_rank) {} KOKKOS_INLINE_FUNCTION - DynRankView & operator = ( const DynRankView & ) = default ; + DynRankView( DynRankView && rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ), m_rank(rhs.m_rank) {} KOKKOS_INLINE_FUNCTION - DynRankView & operator = ( DynRankView && ) = default ; + DynRankView & operator = ( const DynRankView & rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } + + KOKKOS_INLINE_FUNCTION + DynRankView & operator = ( DynRankView && rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } //---------------------------------------- // Compatible view copy constructor and assignment // may assign unmanaged from managed. - template< class RT , class ... RP > KOKKOS_INLINE_FUNCTION DynRankView( const DynRankView & rhs ) - : view_type( rhs.ConstDownCast() ) + : m_track( rhs.m_track , traits::is_managed ) + , m_map() , m_rank(rhs.m_rank) - {} + { + typedef typename DynRankView ::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); + Mapping::assign( m_map , rhs.m_map , rhs.m_track ); + } template< class RT , class ... RP > KOKKOS_INLINE_FUNCTION DynRankView & operator = (const DynRankView & rhs ) - { - view_type::operator = ( rhs.ConstDownCast() ); - m_rank = rhs.rank(); - return *this; - } + { + typedef typename DynRankView ::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); + Mapping::assign( m_map , rhs.m_map , rhs.m_track ); + m_track.assign( rhs.m_track , traits::is_managed ); + m_rank = rhs.rank(); + return *this; + } + +// Experimental +// Copy/Assign View to DynRankView + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + DynRankView( const View & rhs ) + : m_track() + , m_map() + , m_rank( rhs.Rank ) + { + typedef typename View::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); + Mapping::assign( *this , rhs ); + } + + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + DynRankView & operator = ( const View & rhs ) + { + typedef typename View::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible View to DynRankView copy assignment" ); + Mapping::assign( *this , rhs ); + return *this ; + } //---------------------------------------- // Allocation tracking properties - using view_type::use_count; - using view_type::label; + KOKKOS_INLINE_FUNCTION + int use_count() const + { return m_track.use_count(); } + + inline + const std::string label() const + { return m_track.template get_label< typename traits::memory_space >(); } //---------------------------------------- // Allocation according to allocation properties and array layout - + // unused arg_layout dimensions must be set to ~size_t(0) so that rank deduction can properly take place template< class ... P > explicit inline DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop @@ -441,12 +784,77 @@ public: , typename traits::array_layout >::type const & arg_layout ) - : view_type( arg_prop - , Impl::DynRankDimTraits::createLayout(arg_layout) ) + : m_track() + , m_map() , m_rank( Impl::DynRankDimTraits::computeRank(arg_layout) ) - {} + { + // Append layout and spaces if not input + typedef Impl::ViewCtorProp< P ... > alloc_prop_input ; -//Wrappers + // use 'std::integral_constant' for non-types + // to avoid duplicate class error. + typedef Impl::ViewCtorProp + < P ... + , typename std::conditional + < alloc_prop_input::has_label + , std::integral_constant + , typename std::string + >::type + , typename std::conditional + < alloc_prop_input::has_memory_space + , std::integral_constant + , typename traits::device_type::memory_space + >::type + , typename std::conditional + < alloc_prop_input::has_execution_space + , std::integral_constant + , typename traits::device_type::execution_space + >::type + > alloc_prop ; + + static_assert( traits::is_managed + , "View allocation constructor requires managed memory" ); + + if ( alloc_prop::initialize && + ! alloc_prop::execution_space::is_initialized() ) { + // If initializing view data then + // the execution space must be initialized. + Kokkos::Impl::throw_runtime_exception("Constructing DynRankView and initializing data with uninitialized execution space"); + } + + // Copy the input allocation properties with possibly defaulted properties + alloc_prop prop( arg_prop ); + +//------------------------------------------------------------ +#if defined( KOKKOS_HAVE_CUDA ) + // If allocating in CudaUVMSpace must fence before and after + // the allocation to protect against possible concurrent access + // on the CPU and the GPU. + // Fence using the trait's executon space (which will be Kokkos::Cuda) + // to avoid incomplete type errors from usng Kokkos::Cuda directly. + if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { + traits::device_type::memory_space::execution_space::fence(); + } +#endif +//------------------------------------------------------------ + + Kokkos::Experimental::Impl::SharedAllocationRecord<> * + record = m_map.allocate_shared( prop , Impl::DynRankDimTraits::createLayout(arg_layout) ); + +//------------------------------------------------------------ +#if defined( KOKKOS_HAVE_CUDA ) + if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { + traits::device_type::memory_space::execution_space::fence(); + } +#endif +//------------------------------------------------------------ + + // Setup and initialization complete, start tracking + m_track.assign_allocated_record_to_uninitialized( record ); + } + + + // Wrappers template< class ... P > explicit KOKKOS_INLINE_FUNCTION DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop @@ -454,10 +862,16 @@ public: , typename traits::array_layout >::type const & arg_layout ) - : view_type( arg_prop - , Impl::DynRankDimTraits::createLayout(arg_layout) ) + : m_track() // No memory tracking + , m_map( arg_prop , Impl::DynRankDimTraits::createLayout(arg_layout) ) , m_rank( Impl::DynRankDimTraits::computeRank(arg_layout) ) - {} + { + static_assert( + std::is_same< pointer_type + , typename Impl::ViewCtorProp< P... >::pointer_type + >::value , + "Constructing DynRankView to wrap user memory must supply matching pointer type" ); + } //---------------------------------------- //Constructor(s) @@ -468,14 +882,14 @@ public: DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop , typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer , size_t - >::type const arg_N0 = 0 - , const size_t arg_N1 = 0 - , const size_t arg_N2 = 0 - , const size_t arg_N3 = 0 - , const size_t arg_N4 = 0 - , const size_t arg_N5 = 0 - , const size_t arg_N6 = 0 - , const size_t arg_N7 = 0 + >::type const arg_N0 = ~size_t(0) + , const size_t arg_N1 = ~size_t(0) + , const size_t arg_N2 = ~size_t(0) + , const size_t arg_N3 = ~size_t(0) + , const size_t arg_N4 = ~size_t(0) + , const size_t arg_N5 = ~size_t(0) + , const size_t arg_N6 = ~size_t(0) + , const size_t arg_N7 = ~size_t(0) ) : DynRankView( arg_prop , typename traits::array_layout @@ -488,14 +902,14 @@ public: DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop , typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer , size_t - >::type const arg_N0 = 0 - , const size_t arg_N1 = 0 - , const size_t arg_N2 = 0 - , const size_t arg_N3 = 0 - , const size_t arg_N4 = 0 - , const size_t arg_N5 = 0 - , const size_t arg_N6 = 0 - , const size_t arg_N7 = 0 + >::type const arg_N0 = ~size_t(0) + , const size_t arg_N1 = ~size_t(0) + , const size_t arg_N2 = ~size_t(0) + , const size_t arg_N3 = ~size_t(0) + , const size_t arg_N4 = ~size_t(0) + , const size_t arg_N5 = ~size_t(0) + , const size_t arg_N6 = ~size_t(0) + , const size_t arg_N7 = ~size_t(0) ) : DynRankView( arg_prop , typename traits::array_layout @@ -514,20 +928,20 @@ public: : DynRankView( Impl::ViewCtorProp< std::string >( arg_label ) , arg_layout ) {} - // Allocate label and layout + // Allocate label and layout, must disambiguate from subview constructor template< typename Label > explicit inline DynRankView( const Label & arg_label , typename std::enable_if< Kokkos::Experimental::Impl::is_view_label