diff --git a/lib/kokkos/.gitignore b/lib/kokkos/.gitignore new file mode 100644 index 0000000000..f9d16be155 --- /dev/null +++ b/lib/kokkos/.gitignore @@ -0,0 +1,8 @@ +# Standard ignores +*~ +*.pyc +\#*# +.#* +.*.swp +.cproject +.project diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt new file mode 100644 index 0000000000..f45fc8d9fc --- /dev/null +++ b/lib/kokkos/CMakeLists.txt @@ -0,0 +1,136 @@ + +# +# A) Forward delcare the package so that certain options are also defined for +# subpackages +# + +TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS) + +#------------------------------------------------------------------------------ +# +# B) Define the common options for Kokkos first so they can be used by +# subpackages as well. +# + +TRIBITS_ADD_DEBUG_OPTION() + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_SIERRA_BUILD + KOKKOS_FOR_SIERRA + "Configure Kokkos for building within the Sierra build system." + OFF + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Cuda + KOKKOS_HAVE_CUDA + "Enable CUDA support in Kokkos." + "${TPL_ENABLE_CUDA}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Cuda_UVM + KOKKOS_USE_CUDA_UVM + "Enable CUDA Unified Virtual Memory support in Kokkos." + OFF + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Pthread + KOKKOS_HAVE_PTHREAD + "Enable Pthread support in Kokkos." + OFF + ) + +ASSERT_DEFINED(TPL_ENABLE_Pthread) +IF (Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread) + MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF). This is not allowed. Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.") +ENDIF () + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_OpenMP + KOKKOS_HAVE_OPENMP + "Enable OpenMP support in Kokkos." + "${${PROJECT_NAME}_ENABLE_OpenMP}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_QTHREAD + KOKKOS_HAVE_QTHREAD + "Enable QTHREAD support in Kokkos." + "${TPL_ENABLE_QTHREAD}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_CXX11 + KOKKOS_HAVE_CXX11 + "Enable C++11 support in Kokkos." + "${${PROJECT_NAME}_ENABLE_CXX11}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_HWLOC + KOKKOS_HAVE_HWLOC + "Enable HWLOC support in Kokkos." + "${TPL_ENABLE_HWLOC}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_MPI + KOKKOS_HAVE_MPI + "Enable MPI support in Kokkos." + "${TPL_ENABLE_MPI}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Debug_Bounds_Check + KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + "Enable bounds checking support in Kokkos." + OFF + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Profiling + KOKKOS_ENABLE_PROFILING_INTERNAL + "Enable KokkosP profiling support for kernel data collections." + "${TPL_ENABLE_DLlib}" + ) + +# placeholder for future device... +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Winthread + KOKKOS_HAVE_WINTHREAD + "Enable Winthread support in Kokkos." + "${TPL_ENABLE_Winthread}" + ) + +# use new/old View +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_USING_DEPRECATED_VIEW + KOKKOS_USING_DEPRECATED_VIEW + "Choose whether to use the old, deprecated Kokkos::View" + OFF + ) + +#------------------------------------------------------------------------------ +# +# C) Process the subpackages for Kokkos +# + +TRIBITS_PROCESS_SUBPACKAGES() + +# +# D) If Kokkos itself is enabled, process the Kokkos package +# + +TRIBITS_PACKAGE_DEF() + +TRIBITS_EXCLUDE_AUTOTOOLS_FILES() + +TRIBITS_EXCLUDE_FILES( + classic/doc + classic/LinAlg/doc/CrsRefactorNotesMay2012 + ) + +TRIBITS_PACKAGE_POSTPROCESS() + diff --git a/lib/kokkos/Copyright.txt b/lib/kokkos/Copyright.txt new file mode 100644 index 0000000000..05980758fa --- /dev/null +++ b/lib/kokkos/Copyright.txt @@ -0,0 +1,40 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER diff --git a/lib/kokkos/HOW_TO_SNAPSHOT b/lib/kokkos/HOW_TO_SNAPSHOT new file mode 100644 index 0000000000..46bfb4167f --- /dev/null +++ b/lib/kokkos/HOW_TO_SNAPSHOT @@ -0,0 +1,73 @@ + +Developers of Kokkos (those who commit modifications to Kokkos) +must maintain the snapshot of Kokkos in the Trilinos repository. + +This file contains instructions for how to +snapshot Kokkos from github.com/kokkos to Trilinos. + +------------------------------------------------------------------------ +*** EVERYTHING GOES RIGHT WORKFLOW *** + +1) Given a 'git clone' of Kokkos and of Trilinos repositories. +1.1) Let ${KOKKOS} be the absolute path to the Kokkos clone. + This path *must* terminate with the directory name 'kokkos'; + e.g., ${HOME}/kokkos . +1.2) Let ${TRILINOS} be the absolute path to the Trilinos directory. + +2) Given that the Kokkos build & test is clean and + changes are committed to the Kokkos clone. + +3) Snapshot the current commit in the Kokkos clone into the Trilinos clone. + This overwrites ${TRILINOS}/packages/kokkos with the content of ${KOKKOS}: + ${KOKKOS}/config/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages + +4) Verify the snapshot commit happened as expected + cd ${TRILINOS}/packages/kokkos + git log -1 --name-only + +5) Modify, build, and test Trilinos with the Kokkos snapshot. + +6) Given that that the Trilinos build & test is clean and + changes are committed to the Trilinos clone. + +7) Attempt push to the Kokkos repository. + If push fails then you must 'remove the Kokkos snapshot' + from your Trilinos clone. + See below. + +8) Attempt to push to the Trilinos repository. + If updating for a failed push requires you to change Kokkos you must + 'remove the Kokkos snapshot' from your Trilinos clone. + See below. + +------------------------------------------------------------------------ +*** WHEN SOMETHING GOES WRONG AND YOU MUST *** +*** REMOVE THE KOKKOS SNAPSHOT FROM YOUR TRILINOS CLONE *** + +1) Query the Trilinos clone commit log. + git log --oneline + +2) Note the of the commit to the Trillinos clone + immediately BEFORE the Kokkos snapshot commit. + Copy this for use in the next command. + +3) IF more than one outstanding commit then you can remove just the + Kokkos snapshot commit with 'git rebase -i'. Edit the rebase file. + Remove or comment out the Kokkos snapshot commit entry. + git rebase -i + +4) IF the Kokkos snapshot commit is the one and only + outstanding commit then remove just than commit. + git reset --hard HEAD~1 + +------------------------------------------------------------------------ +*** REGARDING 'snapshot.py' TOOL *** + +The 'snapshot.py' tool is developed and maintained by the +Center for Computing Research (CCR) +Software Engineering, Maintenance, and Support (SEMS) team. + +Contact Brent Perschbacher for questions> + +------------------------------------------------------------------------ + diff --git a/lib/kokkos/LICENSE b/lib/kokkos/LICENSE new file mode 100644 index 0000000000..05980758fa --- /dev/null +++ b/lib/kokkos/LICENSE @@ -0,0 +1,40 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos new file mode 100644 index 0000000000..61d678630f --- /dev/null +++ b/lib/kokkos/Makefile.kokkos @@ -0,0 +1,471 @@ +# Default settings common options + +#LAMMPS specific settings: +KOKKOS_PATH=../../lib/kokkos +CXXFLAGS=$(CCFLAGS) + +#Options: OpenMP,Serial,Pthreads,Cuda +#KOKKOS_DEVICES ?= "OpenMP" +KOKKOS_DEVICES ?= "Pthreads" +#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,ARMv8,BGQ,Power7,Power8,KNL +KOKKOS_ARCH ?= "" +#Options: yes,no +KOKKOS_DEBUG ?= "no" +#Options: hwloc,librt,experimental_memkind +KOKKOS_USE_TPLS ?= "" +#Options: c++11 +KOKKOS_CXX_STANDARD ?= "c++11" +#Options: aggressive_vectorization,disable_profiling +KOKKOS_OPTIONS ?= "aggressive_vectorization" + +#Default settings specific options +#Options: force_uvm,use_ldg,rdc,enable_lambda +KOKKOS_CUDA_OPTIONS ?= "" + +# Check for general settings + +KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l)) +KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l)) + +# Check for external libraries +KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l)) +KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l)) +KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l)) + +# Check for advanced settings +KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l)) +KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l)) +KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l)) +KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l)) +KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l)) +KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l)) + +# Check for Kokkos Host Execution Spaces one of which must be on + +KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l)) +KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l)) +KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l)) +KOKKOS_INTERNAL_USE_QTHREAD := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthread | wc -l)) + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) + KOKKOS_INTERNAL_USE_SERIAL := 1 +endif +endif + +KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l) +KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l) +KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l) +KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l) +KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname | grep CYGWIN | wc -l) + +ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_INTERNAL_OPENMP_FLAG := -mp +else + ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + # OpenMP is turned on by default in Cray compiler environment + KOKKOS_INTERNAL_OPENMP_FLAG := + else + KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp + endif + endif +endif + +ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_INTERNAL_CXX11_FLAG := --c++11 +else + ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11 + else + KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11 + endif + endif +endif + +# Check for other Execution Spaces +KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l)) + +# Check for Kokkos Architecture settings + +#Intel based +KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l)) + +#NVIDIA based +NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper +KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) +endif + +#ARM based +KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l)) + +#IBM based +KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc)) + +#AMD based +KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l)) + +#Any AVX? +KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc )) +KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW) | bc )) +KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) + +# Decide what ISA level we are able to support +KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) +KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) +KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc )) + +#Incompatible flags? +KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc )) +KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc)) + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) + $(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1) + $(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) +endif + +#Generating the list of Flags + +KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src + +# No warnings: +KOKKOS_CXXFLAGS = +# INTEL and CLANG warnings: +#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized +# GCC warnings: +#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized -Wignored-qualifiers -Wempty-body -Wclobbered + +KOKKOS_LIBS = -lkokkos -ldl +KOKKOS_LDFLAGS = -L$(shell pwd) +KOKKOS_SRC = +KOKKOS_HEADERS = + +#Generating the KokkosCore_config.h file + +tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp) +tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp) +tmp := $(shell date >> KokkosCore_config.tmp) +tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp) + + +tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp) +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) + KOKKOS_CPPFLAGS += -I$(QTHREAD_PATH)/include + KOKKOS_LDFLAGS += -L$(QTHREAD_PATH)/lib + tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREAD 1" >> KokkosCore_config.tmp ) +endif + +tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp) +ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) + tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_CXXFLAGS += -G +endif + KOKKOS_CXXFLAGS += -g + KOKKOS_LDFLAGS += -g -ldl + tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) + KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include + KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib + KOKKOS_LIBS += -lhwloc + tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) + tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp ) + KOKKOS_LIBS += -lrt +endif + +ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) + KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include + KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib + KOKKOS_LIBS += -lmemkind + tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1) + tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING 0" >> KokkosCore_config.tmp ) +endif + +tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp) + +ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1) + tmp := $(shell echo "\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION 1" >> KokkosCore_config.tmp ) +endif + +tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp) + +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += --relocatable-device-code=true + KOKKOS_LDFLAGS += --relocatable-device-code=true +endif + +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -expt-extended-lambda +endif + +#Add Architecture flags + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + KOKKOS_CXXFLAGS += -mavx + KOKKOS_LDFLAGS += -mavx + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -mcpu=power8 + KOKKOS_LDFLAGS += -mcpu=power8 +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xCORE-AVX2 + KOKKOS_LDFLAGS += -xCORE-AVX2 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Assume that this is a really a GNU compiler + KOKKOS_CXXFLAGS += -march=core-avx2 + KOKKOS_LDFLAGS += -march=core-avx2 + endif + endif + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xMIC-AVX512 + KOKKOS_LDFLAGS += -xMIC-AVX512 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Asssume that this is really a GNU compiler + KOKKOS_CXXFLAGS += -march=knl + KOKKOS_LDFLAGS += -march=knl + endif + endif + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -mmic + KOKKOS_LDFLAGS += -mmic +endif + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_30 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_32 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_35 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_37 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_50 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_52 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_53 +endif +endif + +KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h) +ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) +KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l)) +else +KOKKOS_INTERNAL_NEW_CONFIG := 1 +endif + +ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) + tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h) +endif + +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp) +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp) +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp) +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp) + +KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp) +KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) + KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 + KOKKOS_LIBS += -lcudart -lcuda +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + KOKKOS_LIBS += -lpthread + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) + KOKKOS_LIBS += -lqthread + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) + ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG) + else + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) + endif + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) +endif + +#With Cygwin functions such as fdopen and fileno are not defined +#when strict ansi is enabled. strict ansi gets enabled with --std=c++11 +#though. So we hard undefine it here. Not sure if that has any bad side effects +#This is needed for gtest actually, not for Kokkos itself! +ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1) + KOKKOS_CXXFLAGS += -U__STRICT_ANSI__ +endif + +# Setting up dependencies + +KokkosCore_config.h: + +KOKKOS_CPP_DEPENDS := KokkosCore_config.h $(KOKKOS_HEADERS) + +KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o) +KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ)) + +include $(KOKKOS_PATH)/Makefile.targets + +kokkos-clean: + rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a + +libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS) + ar cr libkokkos.a $(KOKKOS_OBJ_LINK) + ranlib libkokkos.a + +KOKKOS_LINK_DEPENDS=libkokkos.a diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets new file mode 100644 index 0000000000..876ae033b7 --- /dev/null +++ b/lib/kokkos/Makefile.targets @@ -0,0 +1,70 @@ +Kokkos_UnorderedMap_impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp +Kokkos_AllocationTracker.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp +Kokkos_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp +Kokkos_Core.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp +Kokkos_CPUDiscovery.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp +Kokkos_Error.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp +Kokkos_ExecPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp +Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp +Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp +Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp +Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp +Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp +Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp +Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp +KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp +Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) +Kokkos_Cuda_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp +Kokkos_Cuda_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp +Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp +Kokkos_Cuda_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) +Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp +Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp +Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) +Kokkos_QthreadExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp +Kokkos_Qthread_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) +Kokkos_OpenMPexec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp +endif + +Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp +Kokkos_HBWAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp + diff --git a/lib/kokkos/README b/lib/kokkos/README new file mode 100644 index 0000000000..25b3778d95 --- /dev/null +++ b/lib/kokkos/README @@ -0,0 +1,135 @@ +Kokkos implements a programming model in C++ for writing performance portable +applications targeting all major HPC platforms. For that purpose it provides +abstractions for both parallel execution of code and data management. +Kokkos is designed to target complex node architectures with N-level memory +hierarchies and multiple types of execution resources. It currently can use +OpenMP, Pthreads and CUDA as backend programming models. + +The core developers of Kokkos are Carter Edwards and Christian Trott +at the Computer Science Research Institute of the Sandia National +Laboratories. + +The KokkosP interface and associated tools are developed by the Application +Performance Team and Kokkos core developers at Sandia National Laboratories. + +To learn more about Kokkos consider watching one of our presentations: +GTC 2015: + http://on-demand.gputechconf.com/gtc/2015/video/S5166.html + http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf + +A programming guide can be found under doc/Kokkos_PG.pdf. This is an initial version +and feedback is greatly appreciated. + +A separate repository with extensive tutorial material can be found under +https://github.com/kokkos/kokkos-tutorials. + +If you have a patch to contribute please feel free to issue a pull request against +the develop branch. For major contributions it is better to contact us first +for guidance. + +For questions please send an email to +kokkos-users@software.sandia.gov + +For non-public questions send an email to +hcedwar(at)sandia.gov and crtrott(at)sandia.gov + +============================================================================ +====Requirements============================================================ +============================================================================ + +Primary tested compilers are: + GCC 4.7.2 + GCC 4.8.4 + GCC 4.9.2 + GCC 5.1.0 + Intel 14.0.4 + Intel 15.0.2 + Intel 16.0.1 + Clang 3.5.2 + Clang 3.6.1 + +Secondary tested compilers are: + CUDA 6.5 (with gcc 4.7.2) + CUDA 7.0 (with gcc 4.7.2) + CUDA 7.5 (with gcc 4.8.4) + +Other compilers working: + PGI 15.4 + IBM XL 13.1.2 + Cygwin 2.1.0 64bit with gcc 4.9.3 + +Primary tested compiler are passing in release mode +with warnings as errors. We are using the following set +of flags: +GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits + -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized +Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized +Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized + +Secondary compilers are passing without -Werror. +Other compilers are tested occasionally. + +============================================================================ +====Getting started========================================================= +============================================================================ + +In the 'example/tutorial' directory you will find step by step tutorial +examples which explain many of the features of Kokkos. They work with +simple Makefiles. To build with g++ and OpenMP simply type 'make openmp' +in the 'example/tutorial' directory. This will build all examples in the +subfolders. + +============================================================================ +====Running Unit Tests====================================================== +============================================================================ + +To run the unit tests create a build directory and run the following commands + +KOKKOS_PATH/generate_makefile.bash +make build-test +make test + +Run KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as +changing the device type for which to build. + +============================================================================ +====Install the library===================================================== +============================================================================ + +To install Kokkos as a library create a build directory and run the following + +KOKKOS_PATH/generate_makefile.bash --prefix=INSTALL_PATH +make lib +make install + +KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as +changing the device type for which to build. + +============================================================================ +====CMakeFiles============================================================== +============================================================================ + +The CMake files contained in this repository require Tribits and are used +for integration with Trilinos. They do not currently support a standalone +CMake build. + +=========================================================================== +====Kokkos and CUDA UVM==================================================== +=========================================================================== + +Kokkos does support UVM as a specific memory space called CudaUVMSpace. +Allocations made with that space are accessible from host and device. +You can tell Kokkos to use that as the default space for Cuda allocations. +In either case UVM comes with a number of restrictions: +(i) You can't access allocations on the host while a kernel is potentially +running. This will lead to segfaults. To avoid that you either need to +call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or +you can set the environment variable CUDA_LAUNCH_BLOCKING=1. +Furthermore in multi socket multi GPU machines, UVM defaults to using +zero copy allocations for technical reasons related to using multiple +GPUs from the same process. If an executable doesn't do that (e.g. each +MPI rank of an application uses a single GPU [can be the same GPU for +multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1. +This will enforce proper UVM allocations, but can lead to errors if +more than a single GPU is used by a single process. + diff --git a/lib/kokkos/algorithms/CMakeLists.txt b/lib/kokkos/algorithms/CMakeLists.txt new file mode 100644 index 0000000000..7853184a54 --- /dev/null +++ b/lib/kokkos/algorithms/CMakeLists.txt @@ -0,0 +1,10 @@ + + +TRIBITS_SUBPACKAGE(Algorithms) + +ADD_SUBDIRECTORY(src) + +TRIBITS_ADD_TEST_DIRECTORIES(unit_tests) +#TRIBITS_ADD_TEST_DIRECTORIES(performance_tests) + +TRIBITS_SUBPACKAGE_POSTPROCESS() diff --git a/lib/kokkos/algorithms/cmake/Dependencies.cmake b/lib/kokkos/algorithms/cmake/Dependencies.cmake new file mode 100644 index 0000000000..1d71d8af34 --- /dev/null +++ b/lib/kokkos/algorithms/cmake/Dependencies.cmake @@ -0,0 +1,5 @@ +TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( + LIB_REQUIRED_PACKAGES KokkosCore + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC + TEST_OPTIONAL_TPLS CUSPARSE + ) diff --git a/lib/kokkos/algorithms/cmake/KokkosAlgorithms_config.h.in b/lib/kokkos/algorithms/cmake/KokkosAlgorithms_config.h.in new file mode 100644 index 0000000000..67334b70f3 --- /dev/null +++ b/lib/kokkos/algorithms/cmake/KokkosAlgorithms_config.h.in @@ -0,0 +1,4 @@ +#ifndef KOKKOS_ALGORITHMS_CONFIG_H +#define KOKKOS_ALGORITHMS_CONFIG_H + +#endif diff --git a/lib/kokkos/algorithms/src/CMakeLists.txt b/lib/kokkos/algorithms/src/CMakeLists.txt new file mode 100644 index 0000000000..dfbf3323c2 --- /dev/null +++ b/lib/kokkos/algorithms/src/CMakeLists.txt @@ -0,0 +1,21 @@ + +TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +#----------------------------------------------------------------------------- + +FILE(GLOB HEADERS *.hpp) +FILE(GLOB SOURCES *.cpp) +LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h) + +#----------------------------------------------------------------------------- + +TRIBITS_ADD_LIBRARY( + kokkosalgorithms + HEADERS ${HEADERS} + SOURCES ${SOURCES} + DEPLIBS + ) + diff --git a/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp b/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp new file mode 100644 index 0000000000..192b1d64f8 --- /dev/null +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -0,0 +1,1744 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_RANDOM_HPP +#define KOKKOS_RANDOM_HPP + +#include +#include +#include +#include +#include + +/// \file Kokkos_Random.hpp +/// \brief Pseudorandom number generators +/// +/// These generators are based on Vigna, Sebastiano (2014). "An +/// experimental exploration of Marsaglia's xorshift generators, +/// scrambled." See: http://arxiv.org/abs/1402.6246 + +namespace Kokkos { + + /*Template functions to get equidistributed random numbers from a generator for a specific Scalar type + + template + struct rand{ + + //Max value returned by draw(Generator& gen) + KOKKOS_INLINE_FUNCTION + static Scalar max(); + + //Returns a value between zero and max() + KOKKOS_INLINE_FUNCTION + static Scalar draw(Generator& gen); + + //Returns a value between zero and range() + //Note: for floating point values range can be larger than max() + KOKKOS_INLINE_FUNCTION + static Scalar draw(Generator& gen, const Scalar& range){} + + //Return value between start and end + KOKKOS_INLINE_FUNCTION + static Scalar draw(Generator& gen, const Scalar& start, const Scalar& end); + }; + + The Random number generators themselves have two components a state-pool and the actual generator + A state-pool manages a number of generators, so that each active thread is able to grep its own. + This allows the generation of random numbers which are independent between threads. Note that + in contrast to CuRand none of the functions of the pool (or the generator) are collectives, + i.e. all functions can be called inside conditionals. + + template + class Pool { + public: + //The Kokkos device type + typedef Device device_type; + //The actual generator type + typedef Generator generator_type; + + //Default constructor: does not initialize a pool + Pool(); + + //Initializing constructor: calls init(seed,Device_Specific_Number); + Pool(unsigned int seed); + + //Intialize Pool with seed as a starting seed with a pool_size of num_states + //The Random_XorShift64 generator is used in serial to initialize all states, + //thus the intialization process is platform independent and deterministic. + void init(unsigned int seed, int num_states); + + //Get a generator. This will lock one of the states, guaranteeing that each thread + //will have its private generator. Note: on Cuda getting a state involves atomics, + //and is thus not deterministic! + generator_type get_state(); + + //Give a state back to the pool. This unlocks the state, and writes the modified + //state of the generator back to the pool. + void free_state(generator_type gen); + + } + + template + class Generator { + public: + //The Kokkos device type + typedef DeviceType device_type; + + //Max return values of respective [X]rand[S]() functions + enum {MAX_URAND = 0xffffffffU}; + enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; + enum {MAX_RAND = static_cast(0xffffffffU/2)}; + enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; + + + //Init with a state and the idx with respect to pool. Note: in serial the + //Generator can be used by just giving it the necessary state arguments + KOKKOS_INLINE_FUNCTION + Generator (STATE_ARGUMENTS, int state_idx = 0); + + //Draw a equidistributed uint32_t in the range (0,MAX_URAND] + KOKKOS_INLINE_FUNCTION + uint32_t urand(); + + //Draw a equidistributed uint64_t in the range (0,MAX_URAND64] + KOKKOS_INLINE_FUNCTION + uint64_t urand64(); + + //Draw a equidistributed uint32_t in the range (0,range] + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range); + + //Draw a equidistributed uint32_t in the range (start,end] + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end ); + + //Draw a equidistributed uint64_t in the range (0,range] + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range); + + //Draw a equidistributed uint64_t in the range (start,end] + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end ); + + //Draw a equidistributed int in the range (0,MAX_RAND] + KOKKOS_INLINE_FUNCTION + int rand(); + + //Draw a equidistributed int in the range (0,range] + KOKKOS_INLINE_FUNCTION + int rand(const int& range); + + //Draw a equidistributed int in the range (start,end] + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end ); + + //Draw a equidistributed int64_t in the range (0,MAX_RAND64] + KOKKOS_INLINE_FUNCTION + int64_t rand64(); + + //Draw a equidistributed int64_t in the range (0,range] + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range); + + //Draw a equidistributed int64_t in the range (start,end] + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end ); + + //Draw a equidistributed float in the range (0,1.0] + KOKKOS_INLINE_FUNCTION + float frand(); + + //Draw a equidistributed float in the range (0,range] + KOKKOS_INLINE_FUNCTION + float frand(const float& range); + + //Draw a equidistributed float in the range (start,end] + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end ); + + //Draw a equidistributed double in the range (0,1.0] + KOKKOS_INLINE_FUNCTION + double drand(); + + //Draw a equidistributed double in the range (0,range] + KOKKOS_INLINE_FUNCTION + double drand(const double& range); + + //Draw a equidistributed double in the range (start,end] + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end ); + + //Draw a standard normal distributed double + KOKKOS_INLINE_FUNCTION + double normal() ; + + //Draw a normal distributed double with given mean and standard deviation + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev=1.0); + } + + //Additional Functions: + + //Fills view with random numbers in the range (0,range] + template + void fill_random(ViewType view, PoolType pool, ViewType::value_type range); + + //Fills view with random numbers in the range (start,end] + template + void fill_random(ViewType view, PoolType pool, + ViewType::value_type start, ViewType::value_type end); + +*/ + + template + struct rand; + + + template + struct rand { + + KOKKOS_INLINE_FUNCTION + static short max(){return 127;} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen) + {return short((gen.rand()&0xff+256)%256);} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const char& range) + {return char(gen.rand(range));} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const char& start, const char& end) + {return char(gen.rand(start,end));} + + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static short max(){return 32767;} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen) + {return short((gen.rand()&0xffff+65536)%32768);} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const short& range) + {return short(gen.rand(range));} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const short& start, const short& end) + {return short(gen.rand(start,end));} + + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static int max(){return Generator::MAX_RAND;} + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen) + {return gen.rand();} + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen, const int& range) + {return gen.rand(range);} + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen, const int& start, const int& end) + {return gen.rand(start,end);} + + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static unsigned int max () { + return Generator::MAX_URAND; + } + KOKKOS_INLINE_FUNCTION + static unsigned int draw (Generator& gen) { + return gen.urand (); + } + KOKKOS_INLINE_FUNCTION + static unsigned int draw(Generator& gen, const unsigned int& range) { + return gen.urand (range); + } + KOKKOS_INLINE_FUNCTION + static unsigned int + draw (Generator& gen, const unsigned int& start, const unsigned int& end) { + return gen.urand (start, end); + } + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static long max () { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (long) == 4 ? + static_cast (Generator::MAX_RAND) : + static_cast (Generator::MAX_RAND64); + } + KOKKOS_INLINE_FUNCTION + static long draw (Generator& gen) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (long) == 4 ? + static_cast (gen.rand ()) : + static_cast (gen.rand64 ()); + } + KOKKOS_INLINE_FUNCTION + static long draw (Generator& gen, const long& range) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (long) == 4 ? + static_cast (gen.rand (static_cast (range))) : + static_cast (gen.rand64 (range)); + } + KOKKOS_INLINE_FUNCTION + static long draw (Generator& gen, const long& start, const long& end) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (long) == 4 ? + static_cast (gen.rand (static_cast (start), + static_cast (end))) : + static_cast (gen.rand64 (start, end)); + } + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static unsigned long max () { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (unsigned long) == 4 ? + static_cast (Generator::MAX_URAND) : + static_cast (Generator::MAX_URAND64); + } + KOKKOS_INLINE_FUNCTION + static unsigned long draw (Generator& gen) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (unsigned long) == 4 ? + static_cast (gen.urand ()) : + static_cast (gen.urand64 ()); + } + KOKKOS_INLINE_FUNCTION + static unsigned long draw(Generator& gen, const unsigned long& range) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (unsigned long) == 4 ? + static_cast (gen.urand (static_cast (range))) : + static_cast (gen.urand64 (range)); + } + KOKKOS_INLINE_FUNCTION + static unsigned long + draw (Generator& gen, const unsigned long& start, const unsigned long& end) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (unsigned long) == 4 ? + static_cast (gen.urand (static_cast (start), + static_cast (end))) : + static_cast (gen.urand64 (start, end)); + } + }; + + // NOTE (mfh 26 oct 2014) This is a partial specialization for long + // long, a C99 / C++11 signed type which is guaranteed to be at + // least 64 bits. Do NOT write a partial specialization for + // int64_t!!! This is just a typedef! It could be either long or + // long long. We don't know which a priori, and I've seen both. + // The types long and long long are guaranteed to differ, so it's + // always safe to specialize for both. + template + struct rand { + KOKKOS_INLINE_FUNCTION + static long long max () { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return Generator::MAX_RAND64; + } + KOKKOS_INLINE_FUNCTION + static long long draw (Generator& gen) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64 (); + } + KOKKOS_INLINE_FUNCTION + static long long draw (Generator& gen, const long long& range) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64 (range); + } + KOKKOS_INLINE_FUNCTION + static long long draw (Generator& gen, const long long& start, const long long& end) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64 (start, end); + } + }; + + // NOTE (mfh 26 oct 2014) This is a partial specialization for + // unsigned long long, a C99 / C++11 unsigned type which is + // guaranteed to be at least 64 bits. Do NOT write a partial + // specialization for uint64_t!!! This is just a typedef! It could + // be either unsigned long or unsigned long long. We don't know + // which a priori, and I've seen both. The types unsigned long and + // unsigned long long are guaranteed to differ, so it's always safe + // to specialize for both. + template + struct rand { + KOKKOS_INLINE_FUNCTION + static unsigned long long max () { + // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 bits. + return Generator::MAX_URAND64; + } + KOKKOS_INLINE_FUNCTION + static unsigned long long draw (Generator& gen) { + // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 bits. + return gen.urand64 (); + } + KOKKOS_INLINE_FUNCTION + static unsigned long long draw (Generator& gen, const unsigned long long& range) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.urand64 (range); + } + KOKKOS_INLINE_FUNCTION + static unsigned long long + draw (Generator& gen, const unsigned long long& start, const unsigned long long& end) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.urand64 (start, end); + } + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static float max(){return 1.0f;} + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen) + {return gen.frand();} + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen, const float& range) + {return gen.frand(range);} + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen, const float& start, const float& end) + {return gen.frand(start,end);} + + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static double max(){return 1.0;} + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen) + {return gen.drand();} + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen, const double& range) + {return gen.drand(range);} + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen, const double& start, const double& end) + {return gen.drand(start,end);} + + }; + + template + struct rand > { + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex max () { + return ::Kokkos::complex (1.0, 1.0); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex draw (Generator& gen) { + const float re = gen.frand (); + const float im = gen.frand (); + return ::Kokkos::complex (re, im); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex draw (Generator& gen, const ::Kokkos::complex& range) { + const float re = gen.frand (real (range)); + const float im = gen.frand (imag (range)); + return ::Kokkos::complex (re, im); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex draw (Generator& gen, const ::Kokkos::complex& start, const ::Kokkos::complex& end) { + const float re = gen.frand (real (start), real (end)); + const float im = gen.frand (imag (start), imag (end)); + return ::Kokkos::complex (re, im); + } + }; + + template + struct rand > { + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex max () { + return ::Kokkos::complex (1.0, 1.0); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex draw (Generator& gen) { + const double re = gen.drand (); + const double im = gen.drand (); + return ::Kokkos::complex (re, im); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex draw (Generator& gen, const ::Kokkos::complex& range) { + const double re = gen.drand (real (range)); + const double im = gen.drand (imag (range)); + return ::Kokkos::complex (re, im); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex draw (Generator& gen, const ::Kokkos::complex& start, const ::Kokkos::complex& end) { + const double re = gen.drand (real (start), real (end)); + const double im = gen.drand (imag (start), imag (end)); + return ::Kokkos::complex (re, im); + } + }; + + template + class Random_XorShift64_Pool; + + template + class Random_XorShift64 { + private: + uint64_t state_; + const int state_idx_; + friend class Random_XorShift64_Pool; + public: + + typedef DeviceType device_type; + + enum {MAX_URAND = 0xffffffffU}; + enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; + enum {MAX_RAND = static_cast(0xffffffff/2)}; + enum {MAX_RAND64 = static_cast(0xffffffffffffffffLL/2-1)}; + + KOKKOS_INLINE_FUNCTION + Random_XorShift64 (uint64_t state, int state_idx = 0) + : state_(state),state_idx_(state_idx){} + + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + state_ ^= state_ >> 12; + state_ ^= state_ << 25; + state_ ^= state_ >> 27; + + uint64_t tmp = state_ * 2685821657736338717ULL; + tmp = tmp>>16; + return static_cast(tmp&MAX_URAND); + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + state_ ^= state_ >> 12; + state_ ^= state_ << 25; + state_ ^= state_ >> 27; + return (state_ * 2685821657736338717ULL) - 1; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND/range)*range; + uint32_t tmp = urand(); + while(tmp>=max_val) + tmp = urand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end ) { + return urand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64/range)*range; + uint64_t tmp = urand64(); + while(tmp>=max_val) + tmp = urand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end ) { + return urand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int rand() { + return static_cast(urand()/2); + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND/range)*range; + int tmp = rand(); + while(tmp>=max_val) + tmp = rand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end ) { + return rand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64() { + return static_cast(urand64()/2); + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64/range)*range; + int64_t tmp = rand64(); + while(tmp>=max_val) + tmp = rand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end ) { + return rand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + float frand() { + return 1.0f * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end ) { + return frand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + double drand() { + return 1.0 * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end ) { + return drand(end-start)+start; + } + + //Marsaglia polar method for drawing a standard normal distributed random number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while(S>=1.0) { + U = drand(); + const double V = drand(); + S = U*U+V*V; + } + return U*sqrt(-2.0*log(S)/S); + } + + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev=1.0) { + return mean + normal()*std_dev; + } + + }; + + template + class Random_XorShift64_Pool { + private: + typedef View lock_type; + typedef View state_data_type; + lock_type locks_; + state_data_type state_; + int num_states_; + + public: + typedef Random_XorShift64 generator_type; + typedef DeviceType device_type; + + Random_XorShift64_Pool() { + num_states_ = 0; + } + Random_XorShift64_Pool(uint64_t seed) { + num_states_ = 0; + init(seed,DeviceType::max_hardware_threads()); + } + + Random_XorShift64_Pool(const Random_XorShift64_Pool& src): + locks_(src.locks_), + state_(src.state_), + num_states_(src.num_states_) + {} + + Random_XorShift64_Pool operator = (const Random_XorShift64_Pool& src) { + locks_ = src.locks_; + state_ = src.state_; + num_states_ = src.num_states_; + return *this; + } + + void init(uint64_t seed, int num_states) { + num_states_ = num_states; + + locks_ = lock_type("Kokkos::Random_XorShift64::locks",num_states_); + state_ = state_data_type("Kokkos::Random_XorShift64::state",num_states_); + + typename state_data_type::HostMirror h_state = create_mirror_view(state_); + typename lock_type::HostMirror h_lock = create_mirror_view(locks_); + + // Execute on the HostMirror's default execution space. + Random_XorShift64 gen(seed,0); + for(int i = 0; i < 17; i++) + gen.rand(); + for(int i = 0; i < num_states_; i++) { + int n1 = gen.rand(); + int n2 = gen.rand(); + int n3 = gen.rand(); + int n4 = gen.rand(); + h_state(i) = (((static_cast(n1)) & 0xffff)<<00) | + (((static_cast(n2)) & 0xffff)<<16) | + (((static_cast(n3)) & 0xffff)<<32) | + (((static_cast(n4)) & 0xffff)<<48); + h_lock(i) = 0; + } + deep_copy(state_,h_state); + deep_copy(locks_,h_lock); + } + + KOKKOS_INLINE_FUNCTION + Random_XorShift64 get_state() const { + const int i = DeviceType::hardware_thread_id();; + return Random_XorShift64(state_(i),i); + } + + KOKKOS_INLINE_FUNCTION + void free_state(const Random_XorShift64& state) const { + state_(state.state_idx_) = state.state_; + } + }; + + + template + class Random_XorShift1024_Pool; + + template + class Random_XorShift1024 { + private: + int p_; + const int state_idx_; + uint64_t state_[16]; + friend class Random_XorShift1024_Pool; + public: + + typedef DeviceType device_type; + + enum {MAX_URAND = 0xffffffffU}; + enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; + enum {MAX_RAND = static_cast(0xffffffffU/2)}; + enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; + + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 (uint64_t* state, int p, int state_idx = 0): + p_(p),state_idx_(state_idx){ + for(int i=0 ; i<16; i++) + state_[i] = state[i]; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + uint64_t state_0 = state_[ p_ ]; + uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + uint64_t tmp = ( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; + tmp = tmp>>16; + return static_cast(tmp&MAX_URAND); + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + uint64_t state_0 = state_[ p_ ]; + uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + return (( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND/range)*range; + uint32_t tmp = urand(); + while(tmp>=max_val) + tmp = urand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end ) { + return urand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64/range)*range; + uint64_t tmp = urand64(); + while(tmp>=max_val) + tmp = urand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end ) { + return urand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int rand() { + return static_cast(urand()/2); + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND/range)*range; + int tmp = rand(); + while(tmp>=max_val) + tmp = rand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end ) { + return rand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64() { + return static_cast(urand64()/2); + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64/range)*range; + int64_t tmp = rand64(); + while(tmp>=max_val) + tmp = rand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end ) { + return rand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + float frand() { + return 1.0f * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end ) { + return frand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + double drand() { + return 1.0 * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end ) { + return frand(end-start)+start; + } + + //Marsaglia polar method for drawing a standard normal distributed random number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while(S>=1.0) { + U = drand(); + const double V = drand(); + S = U*U+V*V; + } + return U*sqrt(-2.0*log(S)/S); + } + + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev=1.0) { + return mean + normal()*std_dev; + } + }; + + + template + class Random_XorShift1024_Pool { + private: + typedef View int_view_type; + typedef View state_data_type; + + int_view_type locks_; + state_data_type state_; + int_view_type p_; + int num_states_; + + public: + typedef Random_XorShift1024 generator_type; + + typedef DeviceType device_type; + + Random_XorShift1024_Pool() { + num_states_ = 0; + } + + inline + Random_XorShift1024_Pool(uint64_t seed){ + num_states_ = 0; + init(seed,DeviceType::max_hardware_threads()); + } + + Random_XorShift1024_Pool(const Random_XorShift1024_Pool& src): + locks_(src.locks_), + state_(src.state_), + p_(src.p_), + num_states_(src.num_states_) + {} + + Random_XorShift1024_Pool operator = (const Random_XorShift1024_Pool& src) { + locks_ = src.locks_; + state_ = src.state_; + p_ = src.p_; + num_states_ = src.num_states_; + return *this; + } + + inline + void init(uint64_t seed, int num_states) { + num_states_ = num_states; + + locks_ = int_view_type("Kokkos::Random_XorShift1024::locks",num_states_); + state_ = state_data_type("Kokkos::Random_XorShift1024::state",num_states_); + p_ = int_view_type("Kokkos::Random_XorShift1024::p",num_states_); + + typename state_data_type::HostMirror h_state = create_mirror_view(state_); + typename int_view_type::HostMirror h_lock = create_mirror_view(locks_); + typename int_view_type::HostMirror h_p = create_mirror_view(p_); + + // Execute on the HostMirror's default execution space. + Random_XorShift64 gen(seed,0); + for(int i = 0; i < 17; i++) + gen.rand(); + for(int i = 0; i < num_states_; i++) { + for(int j = 0; j < 16 ; j++) { + int n1 = gen.rand(); + int n2 = gen.rand(); + int n3 = gen.rand(); + int n4 = gen.rand(); + h_state(i,j) = (((static_cast(n1)) & 0xffff)<<00) | + (((static_cast(n2)) & 0xffff)<<16) | + (((static_cast(n3)) & 0xffff)<<32) | + (((static_cast(n4)) & 0xffff)<<48); + } + h_p(i) = 0; + h_lock(i) = 0; + } + deep_copy(state_,h_state); + deep_copy(locks_,h_lock); + } + + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 get_state() const { + const int i = DeviceType::hardware_thread_id(); + return Random_XorShift1024(&state_(i,0),p_(i),i); + }; + + KOKKOS_INLINE_FUNCTION + void free_state(const Random_XorShift1024& state) const { + for(int i = 0; i<16; i++) + state_(state.state_idx_,i) = state.state_[i]; + p_(state.state_idx_) = state.p_; + } + }; + +#if defined(KOKKOS_HAVE_CUDA) && defined(__CUDACC__) + + template<> + class Random_XorShift1024 { + private: + int p_; + const int state_idx_; + uint64_t* state_; + friend class Random_XorShift1024_Pool; + public: + + typedef Kokkos::Cuda device_type; + + enum {MAX_URAND = 0xffffffffU}; + enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; + enum {MAX_RAND = static_cast(0xffffffffU/2)}; + enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; + + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 (uint64_t* state, int p, int state_idx = 0): + p_(p),state_idx_(state_idx),state_(state){ + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + uint64_t state_0 = state_[ p_ ]; + uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + uint64_t tmp = ( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; + tmp = tmp>>16; + return static_cast(tmp&MAX_URAND); + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + uint64_t state_0 = state_[ p_ ]; + uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + return (( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND/range)*range; + uint32_t tmp = urand(); + while(tmp>=max_val) + urand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end ) { + return urand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64/range)*range; + uint64_t tmp = urand64(); + while(tmp>=max_val) + urand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end ) { + return urand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int rand() { + return static_cast(urand()/2); + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND/range)*range; + int tmp = rand(); + while(tmp>=max_val) + rand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end ) { + return rand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64() { + return static_cast(urand64()/2); + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64/range)*range; + int64_t tmp = rand64(); + while(tmp>=max_val) + rand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end ) { + return rand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + float frand() { + return 1.0f * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end ) { + return frand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + double drand() { + return 1.0 * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end ) { + return frand(end-start)+start; + } + + //Marsaglia polar method for drawing a standard normal distributed random number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while(S>=1.0) { + U = drand(); + const double V = drand(); + S = U*U+V*V; + } + return U*sqrt(-2.0*log(S)/S); + } + + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev=1.0) { + return mean + normal()*std_dev; + } + }; + +template<> +inline +Random_XorShift64_Pool::Random_XorShift64_Pool(uint64_t seed) { + num_states_ = 0; + init(seed,4*32768); +} + +template<> +KOKKOS_INLINE_FUNCTION +Random_XorShift64 Random_XorShift64_Pool::get_state() const { +#ifdef __CUDA_ARCH__ + const int i_offset = (threadIdx.x*blockDim.y + threadIdx.y)*blockDim.z+threadIdx.z; + int i = (((blockIdx.x*gridDim.y+blockIdx.y)*gridDim.z + blockIdx.z) * + blockDim.x*blockDim.y*blockDim.z + i_offset)%num_states_; + while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { + i+=blockDim.x*blockDim.y*blockDim.z; + if(i>=num_states_) {i = i_offset;} + } + + return Random_XorShift64(state_(i),i); +#else + return Random_XorShift64(state_(0),0); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void Random_XorShift64_Pool::free_state(const Random_XorShift64 &state) const { +#ifdef __CUDA_ARCH__ + state_(state.state_idx_) = state.state_; + locks_(state.state_idx_) = 0; + return; +#endif +} + + +template<> +inline +Random_XorShift1024_Pool::Random_XorShift1024_Pool(uint64_t seed) { + num_states_ = 0; + init(seed,4*32768); +} + +template<> +KOKKOS_INLINE_FUNCTION +Random_XorShift1024 Random_XorShift1024_Pool::get_state() const { +#ifdef __CUDA_ARCH__ + const int i_offset = (threadIdx.x*blockDim.y + threadIdx.y)*blockDim.z+threadIdx.z; + int i = (((blockIdx.x*gridDim.y+blockIdx.y)*gridDim.z + blockIdx.z) * + blockDim.x*blockDim.y*blockDim.z + i_offset)%num_states_; + while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { + i+=blockDim.x*blockDim.y*blockDim.z; + if(i>=num_states_) {i = i_offset;} + } + + return Random_XorShift1024(&state_(i,0), p_(i), i); +#else + return Random_XorShift1024(&state_(0,0), p_(0), 0); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void Random_XorShift1024_Pool::free_state(const Random_XorShift1024 &state) const { +#ifdef __CUDA_ARCH__ + for(int i=0; i<16; i++) + state_(state.state_idx_,i) = state.state_[i]; + locks_(state.state_idx_) = 0; + return; +#endif +} + + +#endif + + + +template +struct fill_random_functor_range; +template +struct fill_random_functor_begin_end; + +template +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (unsigned int i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(unsigned int j=0;j +void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) { + int64_t LDA = a.dimension_0(); + if(LDA>0) + parallel_for((LDA+127)/128,fill_random_functor_range(a,g,range)); +} + +template +void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin,typename ViewType::const_value_type end ) { + int64_t LDA = a.dimension_0(); + if(LDA>0) + parallel_for((LDA+127)/128,fill_random_functor_begin_end(a,g,begin,end)); +} +} + +#endif diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp new file mode 100644 index 0000000000..6123ce978c --- /dev/null +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -0,0 +1,496 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#ifndef KOKKOS_SORT_HPP_ +#define KOKKOS_SORT_HPP_ + +#include + +#include + +namespace Kokkos { + + namespace SortImpl { + + template + struct CopyOp; + + template + struct CopyOp { + template + KOKKOS_INLINE_FUNCTION + static void copy(DstType& dst, size_t i_dst, + SrcType& src, size_t i_src ) { + dst(i_dst) = src(i_src); + } + }; + + template + struct CopyOp { + template + KOKKOS_INLINE_FUNCTION + static void copy(DstType& dst, size_t i_dst, + SrcType& src, size_t i_src ) { + for(int j = 0;j< (int) dst.dimension_1(); j++) + dst(i_dst,j) = src(i_src,j); + } + }; + + template + struct CopyOp { + template + KOKKOS_INLINE_FUNCTION + static void copy(DstType& dst, size_t i_dst, + SrcType& src, size_t i_src ) { + for(int j = 0; j +class BinSort { + + +public: + template + struct bin_sort_sort_functor { + typedef ExecutionSpace execution_space; + typedef typename ValuesViewType::non_const_type values_view_type; + typedef typename ValuesViewType::const_type const_values_view_type; + Kokkos::View > values; + values_view_type sorted_values; + typename PermuteViewType::const_type sort_order; + bin_sort_sort_functor(const_values_view_type values_, values_view_type sorted_values_, PermuteViewType sort_order_): + values(values_),sorted_values(sorted_values_),sort_order(sort_order_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + //printf("Sort: %i %i\n",i,sort_order(i)); + CopyOp::copy(sorted_values,i,values,sort_order(i)); + } + }; + + typedef ExecutionSpace execution_space; + typedef BinSortOp bin_op_type; + + struct bin_count_tag {}; + struct bin_offset_tag {}; + struct bin_binning_tag {}; + struct bin_sort_bins_tag {}; + +public: + typedef SizeType size_type; + typedef size_type value_type; + + typedef Kokkos::View offset_type; + typedef Kokkos::View bin_count_type; + + + typedef Kokkos::View const_key_view_type; + typedef Kokkos::View > const_rnd_key_view_type; + + typedef typename KeyViewType::non_const_value_type non_const_key_scalar; + typedef typename KeyViewType::const_value_type const_key_scalar; + +private: + const_key_view_type keys; + const_rnd_key_view_type keys_rnd; + +public: + BinSortOp bin_op; + + offset_type bin_offsets; + + Kokkos::View > bin_count_atomic; + bin_count_type bin_count_const; + + offset_type sort_order; + + bool sort_within_bins; + +public: + + // Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false) + BinSort(const_key_view_type keys_, BinSortOp bin_op_, + bool sort_within_bins_ = false) + :keys(keys_),keys_rnd(keys_), bin_op(bin_op_) { + + bin_count_atomic = Kokkos::View("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins()); + bin_count_const = bin_count_atomic; + bin_offsets = offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins()); + sort_order = offset_type("PermutationVector",keys.dimension_0()); + sort_within_bins = sort_within_bins_; + } + + // Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed + void create_permute_vector() { + Kokkos::parallel_for (Kokkos::RangePolicy (0,keys.dimension_0()),*this); + Kokkos::parallel_scan(Kokkos::RangePolicy (0,bin_op.max_bins()) ,*this); + + Kokkos::deep_copy(bin_count_atomic,0); + Kokkos::parallel_for (Kokkos::RangePolicy (0,keys.dimension_0()),*this); + + if(sort_within_bins) + Kokkos::parallel_for (Kokkos::RangePolicy(0,bin_op.max_bins()) ,*this); + } + + // Sort a view with respect ot the first dimension using the permutation array + template + void sort(ValuesViewType values) { + ValuesViewType sorted_values = ValuesViewType("Copy", + values.dimension_0(), + values.dimension_1(), + values.dimension_2(), + values.dimension_3(), + values.dimension_4(), + values.dimension_5(), + values.dimension_6(), + values.dimension_7()); + + parallel_for(values.dimension_0(), + bin_sort_sort_functor >(values,sorted_values,sort_order)); + + deep_copy(values,sorted_values); + } + + // Get the permutation vector + KOKKOS_INLINE_FUNCTION + offset_type get_permute_vector() const { return sort_order;} + + // Get the start offsets for each bin + KOKKOS_INLINE_FUNCTION + offset_type get_bin_offsets() const { return bin_offsets;} + + // Get the count for each bin + KOKKOS_INLINE_FUNCTION + bin_count_type get_bin_count() const {return bin_count_const;} + +public: + KOKKOS_INLINE_FUNCTION + void operator() (const bin_count_tag& tag, const int& i) const { + bin_count_atomic(bin_op.bin(keys,i))++; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const bin_offset_tag& tag, const int& i, value_type& offset, const bool& final) const { + if(final) { + bin_offsets(i) = offset; + } + offset+=bin_count_const(i); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const bin_binning_tag& tag, const int& i) const { + const int bin = bin_op.bin(keys,i); + const int count = bin_count_atomic(bin)++; + + sort_order(bin_offsets(bin) + count) = i; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const bin_sort_bins_tag& tag, const int&i ) const { + bool sorted = false; + int upper_bound = bin_offsets(i)+bin_count_const(i); + while(!sorted) { + sorted = true; + int old_idx = sort_order(bin_offsets(i)); + int new_idx; + for(int k=bin_offsets(i)+1; k +struct DefaultBinOp1D { + const int max_bins_; + const double mul_; + typename KeyViewType::const_value_type range_; + typename KeyViewType::const_value_type min_; + + //Construct BinOp with number of bins, minimum value and maxuimum value + DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min, + typename KeyViewType::const_value_type max ) + :max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {} + + //Determine bin index from key value + template + KOKKOS_INLINE_FUNCTION + int bin(ViewType& keys, const int& i) const { + return int(mul_*(keys(i)-min_)); + } + + //Return maximum bin index + 1 + KOKKOS_INLINE_FUNCTION + int max_bins() const { + return max_bins_; + } + + //Compare to keys within a bin if true new_val will be put before old_val + template + KOKKOS_INLINE_FUNCTION + bool operator()(ViewType& keys, iType1& i1, iType2& i2) const { + return keys(i1) +struct DefaultBinOp3D { + int max_bins_[3]; + double mul_[3]; + typename KeyViewType::non_const_value_type range_[3]; + typename KeyViewType::non_const_value_type min_[3]; + + DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], + typename KeyViewType::const_value_type max[] ) + { + max_bins_[0] = max_bins__[0]+1; + max_bins_[1] = max_bins__[1]+1; + max_bins_[2] = max_bins__[2]+1; + mul_[0] = 1.0*max_bins__[0]/(max[0]-min[0]); + mul_[1] = 1.0*max_bins__[1]/(max[1]-min[1]); + mul_[2] = 1.0*max_bins__[2]/(max[2]-min[2]); + range_[0] = max[0]-min[0]; + range_[1] = max[1]-min[1]; + range_[2] = max[2]-min[2]; + min_[0] = min[0]; + min_[1] = min[1]; + min_[2] = min[2]; + } + + template + KOKKOS_INLINE_FUNCTION + int bin(ViewType& keys, const int& i) const { + return int( (((int(mul_[0]*(keys(i,0)-min_[0]))*max_bins_[1]) + + int(mul_[1]*(keys(i,1)-min_[1])))*max_bins_[2]) + + int(mul_[2]*(keys(i,2)-min_[2]))); + } + + KOKKOS_INLINE_FUNCTION + int max_bins() const { + return max_bins_[0]*max_bins_[1]*max_bins_[2]; + } + + template + KOKKOS_INLINE_FUNCTION + bool operator()(ViewType& keys, iType1& i1 , iType2& i2) const { + if (keys(i1,0)>keys(i2,0)) return true; + else if (keys(i1,0)==keys(i2,0)) { + if (keys(i1,1)>keys(i2,1)) return true; + else if (keys(i1,1)==keys(i2,2)) { + if (keys(i1,2)>keys(i2,2)) return true; + } + } + return false; + } +}; + +template +struct min_max { + Scalar min; + Scalar max; + bool init; + + KOKKOS_INLINE_FUNCTION + min_max() { + min = 0; + max = 0; + init = 0; + } + + KOKKOS_INLINE_FUNCTION + min_max (const min_max& val) { + min = val.min; + max = val.max; + init = val.init; + } + + KOKKOS_INLINE_FUNCTION + min_max operator = (const min_max& val) { + min = val.min; + max = val.max; + init = val.init; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator+= (const Scalar& val) { + if(init) { + min = minval?max:val; + } else { + min = val; + max = val; + init = 1; + } + } + + KOKKOS_INLINE_FUNCTION + void operator+= (const min_max& val) { + if(init && val.init) { + min = minval.max?max:val.max; + } else { + if(val.init) { + min = val.min; + max = val.max; + init = 1; + } + } + } + + KOKKOS_INLINE_FUNCTION + void operator+= (volatile const Scalar& val) volatile { + if(init) { + min = minval?max:val; + } else { + min = val; + max = val; + init = 1; + } + } + + KOKKOS_INLINE_FUNCTION + void operator+= (volatile const min_max& val) volatile { + if(init && val.init) { + min = minval.max?max:val.max; + } else { + if(val.init) { + min = val.min; + max = val.max; + init = 1; + } + } + } +}; + + +template +struct min_max_functor { + typedef typename ViewType::execution_space execution_space; + ViewType view; + typedef min_max value_type; + min_max_functor (const ViewType view_):view(view_) { + } + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t& i, value_type& val) const { + val += view(i); + } +}; + +template +bool try_std_sort(ViewType view) { + bool possible = true; +#if ! KOKKOS_USING_EXP_VIEW + size_t stride[8]; + view.stride(stride); +#else + size_t stride[8] = { view.stride_0() + , view.stride_1() + , view.stride_2() + , view.stride_3() + , view.stride_4() + , view.stride_5() + , view.stride_6() + , view.stride_7() + }; +#endif + possible = possible && Impl::is_same::value; + possible = possible && (ViewType::Rank == 1); + possible = possible && (stride[0] == 1); + if(possible) { + std::sort(view.ptr_on_device(),view.ptr_on_device()+view.dimension_0()); + } + return possible; +} + +} + +template +void sort(ViewType view, bool always_use_kokkos_sort = false) { + if(!always_use_kokkos_sort) { + if(SortImpl::try_std_sort(view)) return; + } + + typedef SortImpl::DefaultBinOp1D CompType; + SortImpl::min_max val; + parallel_reduce(view.dimension_0(),SortImpl::min_max_functor(view),val); + BinSort bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true); + bin_sort.create_permute_vector(); + bin_sort.sort(view); +} + +/*template +void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) { + +}*/ + +} + +#endif diff --git a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt new file mode 100644 index 0000000000..654104b44e --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt @@ -0,0 +1,38 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) + +SET(SOURCES + UnitTestMain.cpp + TestCuda.cpp + ) + +SET(LIBRARIES kokkoscore) + +IF(Kokkos_ENABLE_OpenMP) + LIST( APPEND SOURCES + TestOpenMP.cpp + ) +ENDIF() + +IF(Kokkos_ENABLE_Serial) + LIST( APPEND SOURCES + TestSerial.cpp + ) +ENDIF() + +IF(Kokkos_ENABLE_Pthread) + LIST( APPEND SOURCES + TestThreads.cpp + ) +ENDIF() + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest + SOURCES ${SOURCES} + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest + ) diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile new file mode 100644 index 0000000000..5d79364c52 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/Makefile @@ -0,0 +1,92 @@ +KOKKOS_PATH = ../.. + +GTEST_PATH = ../../TPL/gtest + +vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests + +default: build_all + echo "End Build" + + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(CXX) + LDFLAGS ?= -lpthread +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= $(CXX) + LDFLAGS ?= -lpthread +endif + +KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests + +TEST_TARGETS = +TARGETS = + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_Cuda + TEST_TARGETS += test-cuda +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_Threads + TEST_TARGETS += test-threads +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_OpenMP + TEST_TARGETS += test-openmp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_Serial + TEST_TARGETS += test-serial +endif + +KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Cuda + +KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Threads + +KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_OpenMP + +KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Serial + +test-cuda: KokkosAlgorithms_UnitTest_Cuda + ./KokkosAlgorithms_UnitTest_Cuda + +test-threads: KokkosAlgorithms_UnitTest_Threads + ./KokkosAlgorithms_UnitTest_Threads + +test-openmp: KokkosAlgorithms_UnitTest_OpenMP + ./KokkosAlgorithms_UnitTest_OpenMP + +test-serial: KokkosAlgorithms_UnitTest_Serial + ./KokkosAlgorithms_UnitTest_Serial + +build_all: $(TARGETS) + +test: $(TEST_TARGETS) + +clean: kokkos-clean + rm -f *.o $(TARGETS) + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc + diff --git a/lib/kokkos/algorithms/unit_tests/TestCuda.cpp b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp new file mode 100644 index 0000000000..d19c778c46 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp @@ -0,0 +1,110 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include + +#include + +#include + +#ifdef KOKKOS_HAVE_CUDA + +#include +#include + +namespace Test { + +class cuda : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); + } + static void TearDownTestCase() + { + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } +}; + +void cuda_test_random_xorshift64( int num_draws ) +{ + Impl::test_random >(num_draws); +} + +void cuda_test_random_xorshift1024( int num_draws ) +{ + Impl::test_random >(num_draws); +} + + +#define CUDA_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( cuda, Random_XorShift64 ) { \ + cuda_test_random_xorshift64(num_draws); \ + } + +#define CUDA_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( cuda, Random_XorShift1024 ) { \ + cuda_test_random_xorshift1024(num_draws); \ + } + +#define CUDA_SORT_UNSIGNED( size ) \ + TEST_F( cuda, SortUnsigned ) { \ + Impl::test_sort< Kokkos::Cuda, unsigned >(size); \ + } + +CUDA_RANDOM_XORSHIFT64( 132141141 ) +CUDA_RANDOM_XORSHIFT1024( 52428813 ) +CUDA_SORT_UNSIGNED(171) + +#undef CUDA_RANDOM_XORSHIFT64 +#undef CUDA_RANDOM_XORSHIFT1024 +#undef CUDA_SORT_UNSIGNED +} + +#endif /* #ifdef KOKKOS_HAVE_CUDA */ + diff --git a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp new file mode 100644 index 0000000000..4b06dffcb6 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp @@ -0,0 +1,102 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include + +//---------------------------------------------------------------------------- +#include +#include +#include + +namespace Test { + +#ifdef KOKKOS_HAVE_OPENMP +class openmp : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned threads_count = omp_get_max_threads(); + + if ( Kokkos::hwloc::available() ) { + threads_count = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa(); + } + + Kokkos::OpenMP::initialize( threads_count ); + } + + static void TearDownTestCase() + { + Kokkos::OpenMP::finalize(); + } +}; + +#define OPENMP_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( openmp, Random_XorShift64 ) { \ + Impl::test_random >(num_draws); \ + } + +#define OPENMP_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( openmp, Random_XorShift1024 ) { \ + Impl::test_random >(num_draws); \ + } + +#define OPENMP_SORT_UNSIGNED( size ) \ + TEST_F( openmp, SortUnsigned ) { \ + Impl::test_sort< Kokkos::OpenMP, unsigned >(size); \ + } + +OPENMP_RANDOM_XORSHIFT64( 10240000 ) +OPENMP_RANDOM_XORSHIFT1024( 10130144 ) +OPENMP_SORT_UNSIGNED(171) + +#undef OPENMP_RANDOM_XORSHIFT64 +#undef OPENMP_RANDOM_XORSHIFT1024 +#undef OPENMP_SORT_UNSIGNED +#endif +} // namespace test + diff --git a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp new file mode 100644 index 0000000000..eade74ed93 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -0,0 +1,476 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_DUALVIEW_HPP +#define KOKKOS_TEST_DUALVIEW_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Test { + +namespace Impl{ + +// This test runs the random number generators and uses some statistic tests to +// check the 'goodness' of the random numbers: +// (i) mean: the mean is expected to be 0.5*RAND_MAX +// (ii) variance: the variance is 1/3*mean*mean +// (iii) covariance: the covariance is 0 +// (iv) 1-tupledistr: the mean, variance and covariance of a 1D Histrogram of random numbers +// (v) 3-tupledistr: the mean, variance and covariance of a 3D Histrogram of random numbers + +#define HIST_DIM3D 24 +#define HIST_DIM1D (HIST_DIM3D*HIST_DIM3D*HIST_DIM3D) + +struct RandomProperties { + uint64_t count; + double mean; + double variance; + double covariance; + double min; + double max; + + KOKKOS_INLINE_FUNCTION + RandomProperties() { + count = 0; + mean = 0.0; + variance = 0.0; + covariance = 0.0; + min = 1e64; + max = -1e64; + } + + KOKKOS_INLINE_FUNCTION + RandomProperties& operator+=(const RandomProperties& add) { + count += add.count; + mean += add.mean; + variance += add.variance; + covariance += add.covariance; + min = add.minmax?add.max:max; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const volatile RandomProperties& add) volatile { + count += add.count; + mean += add.mean; + variance += add.variance; + covariance += add.covariance; + min = add.minmax?add.max:max; + } +}; + +template +struct test_random_functor { + typedef typename GeneratorPool::generator_type rnd_type; + + typedef RandomProperties value_type; + typedef typename GeneratorPool::device_type device_type; + + GeneratorPool rand_pool; + const double mean; + + // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define + // an exclusive upper bound on the range of random numbers that + // draw() can generate. However, for the float specialization, some + // implementations might violate this upper bound, due to rounding + // error. Just in case, we leave an extra space at the end of each + // dimension, in the View types below. + typedef Kokkos::View type_1d; + type_1d density_1d; + typedef Kokkos::View type_3d; + type_3d density_3d; + + test_random_functor (GeneratorPool rand_pool_, type_1d d1d, type_3d d3d) : + rand_pool (rand_pool_), + mean (0.5*Kokkos::rand::max ()), + density_1d (d1d), + density_3d (d3d) + {} + + KOKKOS_INLINE_FUNCTION + void operator() (int i, RandomProperties& prop) const { + using Kokkos::atomic_fetch_add; + + rnd_type rand_gen = rand_pool.get_state(); + for (int k = 0; k < 1024; ++k) { + const Scalar tmp = Kokkos::rand::draw(rand_gen); + prop.count++; + prop.mean += tmp; + prop.variance += (tmp-mean)*(tmp-mean); + const Scalar tmp2 = Kokkos::rand::draw(rand_gen); + prop.count++; + prop.mean += tmp2; + prop.variance += (tmp2-mean)*(tmp2-mean); + prop.covariance += (tmp-mean)*(tmp2-mean); + const Scalar tmp3 = Kokkos::rand::draw(rand_gen); + prop.count++; + prop.mean += tmp3; + prop.variance += (tmp3-mean)*(tmp3-mean); + prop.covariance += (tmp2-mean)*(tmp3-mean); + + // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to + // define an exclusive upper bound on the range of random + // numbers that draw() can generate. However, for the float + // specialization, some implementations might violate this upper + // bound, due to rounding error. Just in case, we have left an + // extra space at the end of each dimension of density_1d and + // density_3d. + // + // Please note that those extra entries might not get counted in + // the histograms. However, if Kokkos::rand is broken and only + // returns values of max(), the histograms will still catch this + // indirectly, since none of the other values will be filled in. + + const Scalar theMax = Kokkos::rand::max (); + + const uint64_t ind1_1d = static_cast (1.0 * HIST_DIM1D * tmp / theMax); + const uint64_t ind2_1d = static_cast (1.0 * HIST_DIM1D * tmp2 / theMax); + const uint64_t ind3_1d = static_cast (1.0 * HIST_DIM1D * tmp3 / theMax); + + const uint64_t ind1_3d = static_cast (1.0 * HIST_DIM3D * tmp / theMax); + const uint64_t ind2_3d = static_cast (1.0 * HIST_DIM3D * tmp2 / theMax); + const uint64_t ind3_3d = static_cast (1.0 * HIST_DIM3D * tmp3 / theMax); + + atomic_fetch_add (&density_1d(ind1_1d), 1); + atomic_fetch_add (&density_1d(ind2_1d), 1); + atomic_fetch_add (&density_1d(ind3_1d), 1); + atomic_fetch_add (&density_3d(ind1_3d, ind2_3d, ind3_3d), 1); + } + rand_pool.free_state(rand_gen); + } +}; + +template +struct test_histogram1d_functor { + typedef RandomProperties value_type; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + + // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define + // an exclusive upper bound on the range of random numbers that + // draw() can generate. However, for the float specialization, some + // implementations might violate this upper bound, due to rounding + // error. Just in case, we leave an extra space at the end of each + // dimension, in the View type below. + typedef Kokkos::View type_1d; + type_1d density_1d; + double mean; + + test_histogram1d_functor (type_1d d1d, int num_draws) : + density_1d (d1d), + mean (1.0*num_draws/HIST_DIM1D*3) + { + printf ("Mean: %e\n", mean); + } + + KOKKOS_INLINE_FUNCTION void + operator() (const typename memory_space::size_type i, + RandomProperties& prop) const + { + typedef typename memory_space::size_type size_type; + const double count = density_1d(i); + prop.mean += count; + prop.variance += 1.0 * (count - mean) * (count - mean); + //prop.covariance += 1.0*count*count; + prop.min = count < prop.min ? count : prop.min; + prop.max = count > prop.max ? count : prop.max; + if (i < static_cast (HIST_DIM1D-1)) { + prop.covariance += (count - mean) * (density_1d(i+1) - mean); + } + } +}; + +template +struct test_histogram3d_functor { + typedef RandomProperties value_type; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + + // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define + // an exclusive upper bound on the range of random numbers that + // draw() can generate. However, for the float specialization, some + // implementations might violate this upper bound, due to rounding + // error. Just in case, we leave an extra space at the end of each + // dimension, in the View type below. + typedef Kokkos::View type_3d; + type_3d density_3d; + double mean; + + test_histogram3d_functor (type_3d d3d, int num_draws) : + density_3d (d3d), + mean (1.0*num_draws/HIST_DIM1D) + {} + + KOKKOS_INLINE_FUNCTION void + operator() (const typename memory_space::size_type i, + RandomProperties& prop) const + { + typedef typename memory_space::size_type size_type; + const double count = density_3d(i/(HIST_DIM3D*HIST_DIM3D), + (i % (HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D, + i % HIST_DIM3D); + prop.mean += count; + prop.variance += (count - mean) * (count - mean); + if (i < static_cast (HIST_DIM1D-1)) { + const double count_next = density_3d((i+1)/(HIST_DIM3D*HIST_DIM3D), + ((i+1)%(HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D, + (i+1)%HIST_DIM3D); + prop.covariance += (count - mean) * (count_next - mean); + } + } +}; + +// +// Templated test that uses the above functors. +// +template +struct test_random_scalar { + typedef typename RandomGenerator::generator_type rnd_type; + + int pass_mean,pass_var,pass_covar; + int pass_hist1d_mean,pass_hist1d_var,pass_hist1d_covar; + int pass_hist3d_mean,pass_hist3d_var,pass_hist3d_covar; + + test_random_scalar (typename test_random_functor::type_1d& density_1d, + typename test_random_functor::type_3d& density_3d, + RandomGenerator& pool, + unsigned int num_draws) + { + using std::cerr; + using std::endl; + using Kokkos::parallel_reduce; + + { + cerr << " -- Testing randomness properties" << endl; + + RandomProperties result; + typedef test_random_functor functor_type; + parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result); + + //printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2); + double tolerance = 2.0*sqrt(1.0/num_draws); + double mean_expect = 0.5*Kokkos::rand::max(); + double variance_expect = 1.0/3.0*mean_expect*mean_expect; + double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0; + double variance_eps = variance_expect/(result.variance/num_draws/3)-1.0; + double covariance_eps = result.covariance/num_draws/2/variance_expect; + pass_mean = ((-tolerance < mean_eps) && + ( tolerance > mean_eps)) ? 1:0; + pass_var = ((-tolerance < variance_eps) && + ( tolerance > variance_eps)) ? 1:0; + pass_covar = ((-1.4*tolerance < covariance_eps) && + ( 1.4*tolerance > covariance_eps)) ? 1:0; + cerr << "Pass: " << pass_mean + << " " << pass_var + << " " << mean_eps + << " " << variance_eps + << " " << covariance_eps + << " || " << tolerance << endl; + } + { + cerr << " -- Testing 1-D histogram" << endl; + + RandomProperties result; + typedef test_histogram1d_functor functor_type; + parallel_reduce (HIST_DIM1D, functor_type (density_1d, num_draws), result); + + double tolerance = 6*sqrt(1.0/HIST_DIM1D); + double mean_expect = 1.0*num_draws*3/HIST_DIM1D; + double variance_expect = 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); + double covariance_expect = -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D; + double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0; + double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0; + double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect; + pass_hist1d_mean = ((-tolerance < mean_eps) && + ( tolerance > mean_eps)) ? 1:0; + pass_hist1d_var = ((-tolerance < variance_eps) && + ( tolerance > variance_eps)) ? 1:0; + pass_hist1d_covar = ((-tolerance < covariance_eps) && + ( tolerance > covariance_eps)) ? 1:0; + + cerr << "Density 1D: " << mean_eps + << " " << variance_eps + << " " << (result.covariance/HIST_DIM1D/HIST_DIM1D) + << " || " << tolerance + << " " << result.min + << " " << result.max + << " || " << result.variance/HIST_DIM1D + << " " << 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D) + << " || " << result.covariance/HIST_DIM1D + << " " << -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D + << endl; + } + { + cerr << " -- Testing 3-D histogram" << endl; + + RandomProperties result; + typedef test_histogram3d_functor functor_type; + parallel_reduce (HIST_DIM1D, functor_type (density_3d, num_draws), result); + + double tolerance = 6*sqrt(1.0/HIST_DIM1D); + double mean_expect = 1.0*num_draws/HIST_DIM1D; + double variance_expect = 1.0*num_draws/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); + double covariance_expect = -1.0*num_draws/HIST_DIM1D/HIST_DIM1D; + double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0; + double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0; + double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect; + pass_hist3d_mean = ((-tolerance < mean_eps) && + ( tolerance > mean_eps)) ? 1:0; + pass_hist3d_var = ((-tolerance < variance_eps) && + ( tolerance > variance_eps)) ? 1:0; + pass_hist3d_covar = ((-tolerance < covariance_eps) && + ( tolerance > covariance_eps)) ? 1:0; + + cerr << "Density 3D: " << mean_eps + << " " << variance_eps + << " " << result.covariance/HIST_DIM1D/HIST_DIM1D + << " || " << tolerance + << " " << result.min + << " " << result.max << endl; + } + } +}; + +template +void test_random(unsigned int num_draws) +{ + using std::cerr; + using std::endl; + typename test_random_functor::type_1d density_1d("D1d"); + typename test_random_functor::type_3d density_3d("D3d"); + + cerr << "Test Scalar=int" << endl; + RandomGenerator pool(31891); + test_random_scalar test_int(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_int.pass_mean,1); + ASSERT_EQ( test_int.pass_var,1); + ASSERT_EQ( test_int.pass_covar,1); + ASSERT_EQ( test_int.pass_hist1d_mean,1); + ASSERT_EQ( test_int.pass_hist1d_var,1); + ASSERT_EQ( test_int.pass_hist1d_covar,1); + ASSERT_EQ( test_int.pass_hist3d_mean,1); + ASSERT_EQ( test_int.pass_hist3d_var,1); + ASSERT_EQ( test_int.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=unsigned int" << endl; + test_random_scalar test_uint(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_uint.pass_mean,1); + ASSERT_EQ( test_uint.pass_var,1); + ASSERT_EQ( test_uint.pass_covar,1); + ASSERT_EQ( test_uint.pass_hist1d_mean,1); + ASSERT_EQ( test_uint.pass_hist1d_var,1); + ASSERT_EQ( test_uint.pass_hist1d_covar,1); + ASSERT_EQ( test_uint.pass_hist3d_mean,1); + ASSERT_EQ( test_uint.pass_hist3d_var,1); + ASSERT_EQ( test_uint.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=int64_t" << endl; + test_random_scalar test_int64(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_int64.pass_mean,1); + ASSERT_EQ( test_int64.pass_var,1); + ASSERT_EQ( test_int64.pass_covar,1); + ASSERT_EQ( test_int64.pass_hist1d_mean,1); + ASSERT_EQ( test_int64.pass_hist1d_var,1); + ASSERT_EQ( test_int64.pass_hist1d_covar,1); + ASSERT_EQ( test_int64.pass_hist3d_mean,1); + ASSERT_EQ( test_int64.pass_hist3d_var,1); + ASSERT_EQ( test_int64.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=uint64_t" << endl; + test_random_scalar test_uint64(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_uint64.pass_mean,1); + ASSERT_EQ( test_uint64.pass_var,1); + ASSERT_EQ( test_uint64.pass_covar,1); + ASSERT_EQ( test_uint64.pass_hist1d_mean,1); + ASSERT_EQ( test_uint64.pass_hist1d_var,1); + ASSERT_EQ( test_uint64.pass_hist1d_covar,1); + ASSERT_EQ( test_uint64.pass_hist3d_mean,1); + ASSERT_EQ( test_uint64.pass_hist3d_var,1); + ASSERT_EQ( test_uint64.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=float" << endl; + test_random_scalar test_float(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_float.pass_mean,1); + ASSERT_EQ( test_float.pass_var,1); + ASSERT_EQ( test_float.pass_covar,1); + ASSERT_EQ( test_float.pass_hist1d_mean,1); + ASSERT_EQ( test_float.pass_hist1d_var,1); + ASSERT_EQ( test_float.pass_hist1d_covar,1); + ASSERT_EQ( test_float.pass_hist3d_mean,1); + ASSERT_EQ( test_float.pass_hist3d_var,1); + ASSERT_EQ( test_float.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=double" << endl; + test_random_scalar test_double(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_double.pass_mean,1); + ASSERT_EQ( test_double.pass_var,1); + ASSERT_EQ( test_double.pass_covar,1); + ASSERT_EQ( test_double.pass_hist1d_mean,1); + ASSERT_EQ( test_double.pass_hist1d_var,1); + ASSERT_EQ( test_double.pass_hist1d_covar,1); + ASSERT_EQ( test_double.pass_hist3d_mean,1); + ASSERT_EQ( test_double.pass_hist3d_var,1); + ASSERT_EQ( test_double.pass_hist3d_covar,1); +} +} + +} // namespace Test + +#endif //KOKKOS_TEST_UNORDERED_MAP_HPP diff --git a/lib/kokkos/algorithms/unit_tests/TestSerial.cpp b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp new file mode 100644 index 0000000000..741cf97ae1 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp @@ -0,0 +1,99 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include + +#include +#include +#include + + +//---------------------------------------------------------------------------- + + +namespace Test { + +#ifdef KOKKOS_HAVE_SERIAL +class serial : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision (5) << std::scientific; + Kokkos::Serial::initialize (); + } + + static void TearDownTestCase () + { + Kokkos::Serial::finalize (); + } +}; + +#define SERIAL_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( serial, Random_XorShift64 ) { \ + Impl::test_random >(num_draws); \ + } + +#define SERIAL_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( serial, Random_XorShift1024 ) { \ + Impl::test_random >(num_draws); \ + } + +#define SERIAL_SORT_UNSIGNED( size ) \ + TEST_F( serial, SortUnsigned ) { \ + Impl::test_sort< Kokkos::Serial, unsigned >(size); \ + } + +SERIAL_RANDOM_XORSHIFT64( 10240000 ) +SERIAL_RANDOM_XORSHIFT1024( 10130144 ) +SERIAL_SORT_UNSIGNED(171) + +#undef SERIAL_RANDOM_XORSHIFT64 +#undef SERIAL_RANDOM_XORSHIFT1024 +#undef SERIAL_SORT_UNSIGNED + +#endif // KOKKOS_HAVE_SERIAL +} // namespace Test + + diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp new file mode 100644 index 0000000000..ccbcbdd001 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -0,0 +1,206 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef TESTSORT_HPP_ +#define TESTSORT_HPP_ + +#include +#include +#include +#include + +namespace Test { + +namespace Impl{ + +template +struct is_sorted_struct { + typedef unsigned int value_type; + typedef ExecutionSpace execution_space; + + Kokkos::View keys; + + is_sorted_struct(Kokkos::View keys_):keys(keys_) {} + KOKKOS_INLINE_FUNCTION + void operator() (int i, unsigned int& count) const { + if(keys(i)>keys(i+1)) count++; + } +}; + +template +struct sum { + typedef double value_type; + typedef ExecutionSpace execution_space; + + Kokkos::View keys; + + sum(Kokkos::View keys_):keys(keys_) {} + KOKKOS_INLINE_FUNCTION + void operator() (int i, double& count) const { + count+=keys(i); + } +}; + +template +struct bin3d_is_sorted_struct { + typedef unsigned int value_type; + typedef ExecutionSpace execution_space; + + Kokkos::View keys; + + int max_bins; + Scalar min; + Scalar max; + + bin3d_is_sorted_struct(Kokkos::View keys_,int max_bins_,Scalar min_,Scalar max_): + keys(keys_),max_bins(max_bins_),min(min_),max(max_) { + } + KOKKOS_INLINE_FUNCTION + void operator() (int i, unsigned int& count) const { + int ix1 = int ((keys(i,0)-min)/max * max_bins); + int iy1 = int ((keys(i,1)-min)/max * max_bins); + int iz1 = int ((keys(i,2)-min)/max * max_bins); + int ix2 = int ((keys(i+1,0)-min)/max * max_bins); + int iy2 = int ((keys(i+1,1)-min)/max * max_bins); + int iz2 = int ((keys(i+1,2)-min)/max * max_bins); + + if (ix1>ix2) count++; + else if(ix1==ix2) { + if (iy1>iy2) count++; + else if ((iy1==iy2) && (iz1>iz2)) count++; + } + } +}; + +template +struct sum3D { + typedef double value_type; + typedef ExecutionSpace execution_space; + + Kokkos::View keys; + + sum3D(Kokkos::View keys_):keys(keys_) {} + KOKKOS_INLINE_FUNCTION + void operator() (int i, double& count) const { + count+=keys(i,0); + count+=keys(i,1); + count+=keys(i,2); + } +}; + +template +void test_1D_sort(unsigned int n,bool force_kokkos) { + typedef Kokkos::View KeyViewType; + KeyViewType keys("Keys",n); + + Kokkos::Random_XorShift64_Pool g(1931); + Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool::generator_type::MAX_URAND); + + double sum_before = 0.0; + double sum_after = 0.0; + unsigned int sort_fails = 0; + + Kokkos::parallel_reduce(n,sum(keys),sum_before); + + Kokkos::sort(keys,force_kokkos); + + Kokkos::parallel_reduce(n,sum(keys),sum_after); + Kokkos::parallel_reduce(n-1,is_sorted_struct(keys),sort_fails); + + double ratio = sum_before/sum_after; + double epsilon = 1e-10; + unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; + + ASSERT_EQ(sort_fails,0); + ASSERT_EQ(equal_sum,1); +} + +template +void test_3D_sort(unsigned int n) { + typedef Kokkos::View KeyViewType; + + KeyViewType keys("Keys",n*n*n); + + Kokkos::Random_XorShift64_Pool g(1931); + Kokkos::fill_random(keys,g,100.0); + + double sum_before = 0.0; + double sum_after = 0.0; + unsigned int sort_fails = 0; + + Kokkos::parallel_reduce(keys.dimension_0(),sum3D(keys),sum_before); + + int bin_1d = 1; + while( bin_1d*bin_1d*bin_1d*4< (int) keys.dimension_0() ) bin_1d*=2; + int bin_max[3] = {bin_1d,bin_1d,bin_1d}; + typename KeyViewType::value_type min[3] = {0,0,0}; + typename KeyViewType::value_type max[3] = {100,100,100}; + + typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp; + BinOp bin_op(bin_max,min,max); + Kokkos::BinSort< KeyViewType , BinOp > + Sorter(keys,bin_op,false); + Sorter.create_permute_vector(); + Sorter.template sort< KeyViewType >(keys); + + Kokkos::parallel_reduce(keys.dimension_0(),sum3D(keys),sum_after); + Kokkos::parallel_reduce(keys.dimension_0()-1,bin3d_is_sorted_struct(keys,bin_1d,min[0],max[0]),sort_fails); + + double ratio = sum_before/sum_after; + double epsilon = 1e-10; + unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; + + printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails); + ASSERT_EQ(sort_fails,0); + ASSERT_EQ(equal_sum,1); +} + +template +void test_sort(unsigned int N) +{ + test_1D_sort(N*N*N, true); + test_1D_sort(N*N*N, false); + test_3D_sort(N); +} + +} +} +#endif /* TESTSORT_HPP_ */ diff --git a/lib/kokkos/algorithms/unit_tests/TestThreads.cpp b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp new file mode 100644 index 0000000000..a61d6c8bd5 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp @@ -0,0 +1,113 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include + +#include +#include +#include + + +//---------------------------------------------------------------------------- + + +namespace Test { + +#ifdef KOKKOS_HAVE_PTHREAD +class threads : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned num_threads = 4; + + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() + * Kokkos::hwloc::get_available_cores_per_numa() + // * Kokkos::hwloc::get_available_threads_per_core() + ; + + } + + std::cout << "Threads: " << num_threads << std::endl; + + Kokkos::Threads::initialize( num_threads ); + } + + static void TearDownTestCase() + { + Kokkos::Threads::finalize(); + } +}; + +#define THREADS_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( threads, Random_XorShift64 ) { \ + Impl::test_random >(num_draws); \ + } + +#define THREADS_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( threads, Random_XorShift1024 ) { \ + Impl::test_random >(num_draws); \ + } + +#define THREADS_SORT_UNSIGNED( size ) \ + TEST_F( threads, SortUnsigned ) { \ + Impl::test_sort< Kokkos::Threads, double >(size); \ + } + + +THREADS_RANDOM_XORSHIFT64( 10240000 ) +THREADS_RANDOM_XORSHIFT1024( 10130144 ) +THREADS_SORT_UNSIGNED(171) + +#undef THREADS_RANDOM_XORSHIFT64 +#undef THREADS_RANDOM_XORSHIFT1024 +#undef THREADS_SORT_UNSIGNED + +#endif +} // namespace Test + + diff --git a/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp new file mode 100644 index 0000000000..f952ab3db5 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp @@ -0,0 +1,50 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} + diff --git a/lib/kokkos/cmake/Dependencies.cmake b/lib/kokkos/cmake/Dependencies.cmake new file mode 100644 index 0000000000..8c51eab4d7 --- /dev/null +++ b/lib/kokkos/cmake/Dependencies.cmake @@ -0,0 +1,10 @@ +TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( + SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS + #SubPackageName Directory Class Req/Opt + # + # New Kokkos subpackages: + Core core PS REQUIRED + Containers containers PS OPTIONAL + Algorithms algorithms PS OPTIONAL + Example example EX OPTIONAL + ) diff --git a/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake b/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake new file mode 100644 index 0000000000..aad1e2bad7 --- /dev/null +++ b/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake @@ -0,0 +1,75 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + +# Check for CUDA support + +IF (NOT TPL_ENABLE_CUDA OR CUDA_VERSION VERSION_LESS "4.1") + MESSAGE(FATAL_ERROR "\nCUSPARSE: did not find acceptable version of CUDA libraries (4.1 or greater)") +ELSE() + IF(CMAKE_VERSION VERSION_LESS "2.8.8") + # FindCUDA before CMake 2.8.8 does not find cusparse library; therefore, we must + find_library(CUDA_cusparse_LIBRARY + cusparse + HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib + ) + IF(CUDA_cusparse_LIBRARY STREQUAL "CUDA_cusparse_LIBRARY-NOTFOUND") + MESSAGE(FATAL_ERROR "\nCUSPARSE: could not find cuspasre library.") + ENDIF() + ENDIF(CMAKE_VERSION VERSION_LESS "2.8.8") + GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) + GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) + GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) +ENDIF() + diff --git a/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake b/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake new file mode 100644 index 0000000000..715b3e9bde --- /dev/null +++ b/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake @@ -0,0 +1,71 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +#----------------------------------------------------------------------------- +# Hardware locality detection and control library. +# +# Acquisition information: +# Date checked: November 2011 +# Checked by: H. Carter Edwards +# Source: http://www.open-mpi.org/projects/hwloc/ +# Version: 1.3 +# + +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC + REQUIRED_HEADERS hwloc.h + REQUIRED_LIBS_NAMES "hwloc" + ) + diff --git a/lib/kokkos/cmake/tpls/FindTPLPthread.cmake b/lib/kokkos/cmake/tpls/FindTPLPthread.cmake new file mode 100644 index 0000000000..fc401d7543 --- /dev/null +++ b/lib/kokkos/cmake/tpls/FindTPLPthread.cmake @@ -0,0 +1,82 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +SET(USE_THREADS FALSE) + +IF(NOT TPL_Pthread_INCLUDE_DIRS AND NOT TPL_Pthread_LIBRARY_DIRS AND NOT TPL_Pthread_LIBRARIES) + # Use CMake's Thread finder since it is a bit smarter in determining + # whether pthreads is already built into the compiler and doesn't need + # a library to link. + FIND_PACKAGE(Threads) + #If Threads found a copy of pthreads make sure it is one of the cases the tribits + #tpl system cannot handle. + IF(Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) + IF(CMAKE_THREAD_LIBS_INIT STREQUAL "" OR CMAKE_THREAD_LIBS_INIT STREQUAL "-pthread") + SET(USE_THREADS TRUE) + ENDIF() + ENDIF() +ENDIF() + +IF(USE_THREADS) + SET(TPL_Pthread_INCLUDE_DIRS "") + SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") + SET(TPL_Pthread_LIBRARY_DIRS "") +ELSE() + TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread + REQUIRED_HEADERS pthread.h + REQUIRED_LIBS_NAMES pthread + ) +ENDIF() diff --git a/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake b/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake new file mode 100644 index 0000000000..994b72b200 --- /dev/null +++ b/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake @@ -0,0 +1,70 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +#----------------------------------------------------------------------------- +# Hardware locality detection and control library. +# +# Acquisition information: +# Date checked: July 2014 +# Checked by: H. Carter Edwards +# Source: https://code.google.com/p/qthreads +# + +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD + REQUIRED_HEADERS qthread.h + REQUIRED_LIBS_NAMES "qthread" + ) + diff --git a/lib/kokkos/config/configure_compton_cpu.sh b/lib/kokkos/config/configure_compton_cpu.sh new file mode 100644 index 0000000000..17287fb848 --- /dev/null +++ b/lib/kokkos/config/configure_compton_cpu.sh @@ -0,0 +1,190 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/host/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +CUDA_ARCH="" +# CUDA_ARCH="20" +# CUDA_ARCH="30" +# CUDA_ARCH="35" + +# Build with Intel compiler + +INTEL=ON + +# Build for MIC architecture: + +# INTEL_XEON_PHI=ON + +# Build with HWLOC at location: + +HWLOC_BASE_DIR="/home/projects/libraries/host/hwloc/1.6.2" + +# Location for MPI to use in examples: + +MPI_BASE_DIR="" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_compton_mic.sh b/lib/kokkos/config/configure_compton_mic.sh new file mode 100644 index 0000000000..7f9aee13f9 --- /dev/null +++ b/lib/kokkos/config/configure_compton_mic.sh @@ -0,0 +1,186 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/mic/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +CUDA_ARCH="" +# CUDA_ARCH="20" +# CUDA_ARCH="30" +# CUDA_ARCH="35" + +# Build for MIC architecture: + +INTEL_XEON_PHI=ON + +# Build with HWLOC at location: + +HWLOC_BASE_DIR="/home/projects/libraries/mic/hwloc/1.6.2" + +# Location for MPI to use in examples: + +MPI_BASE_DIR="" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_kokkos.sh b/lib/kokkos/config/configure_kokkos.sh new file mode 100644 index 0000000000..592e7f5936 --- /dev/null +++ b/lib/kokkos/config/configure_kokkos.sh @@ -0,0 +1,293 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +#----------------------------------------------------------------------------- + +USE_CUDA_ARCH= +USE_THREAD= +USE_OPENMP= +USE_INTEL= +USE_XEON_PHI= +HWLOC_BASE_DIR= +MPI_BASE_DIR= +BLAS_LIB_DIR= +LAPACK_LIB_DIR= + +if [ 1 ] ; then + # Platform 'kokkos-dev' with Cuda, OpenMP, hwloc, mpi, gnu + USE_CUDA_ARCH="35" + USE_OPENMP=ON + HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7" + MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7" + BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib" + LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib" + +elif [ ] ; then + # Platform 'kokkos-dev' with Cuda, Threads, hwloc, mpi, gnu + USE_CUDA_ARCH="35" + USE_THREAD=ON + HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7" + MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7" + BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib" + LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib" + +elif [ ] ; then + # Platform 'kokkos-dev' with Xeon Phi and hwloc + USE_OPENMP=ON + USE_INTEL=ON + USE_XEON_PHI=ON + HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/mic/intel/13.SP1.1.106" + +elif [ ] ; then + # Platform 'kokkos-nvidia' with Cuda, OpenMP, hwloc, mpi, gnu + USE_CUDA_ARCH="20" + USE_OPENMP=ON + HWLOC_BASE_DIR="/home/sems/common/hwloc/current" + MPI_BASE_DIR="/home/sems/common/openmpi/current" + +elif [ ] ; then + # Platform 'kokkos-nvidia' with Cuda, Threads, hwloc, mpi, gnu + USE_CUDA_ARCH="20" + USE_THREAD=ON + HWLOC_BASE_DIR="/home/sems/common/hwloc/current" + MPI_BASE_DIR="/home/sems/common/openmpi/current" + +fi + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure command line options: + +CMAKE_CONFIGURE="" +CMAKE_CXX_FLAGS="" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- + +if [ 1 ] ; then + + # Configure for Tpetra/Kokkos: + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${BLAS_LIB_DIR}" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_DIRS:FILEPATH=${LAPACK_LIB_DIR}" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Tpetra:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Kokkos:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraClassic:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Tpetra_ENABLE_Kokkos_Refactor:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D KokkosClassic_DefaultNode:STRING=Kokkos::Compat::KokkosOpenMPWrapperNode" + + CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}-DKOKKOS_FAST_COMPILE" + + if [ -n "${USE_CUDA_ARCH}" ] ; then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Cuda:BOOL=ON" + + fi + +fi + +if [ 1 ] ; then + + # Configure for Stokhos: + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Sacado:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Stokhos:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Stokhos_ENABLE_Belos:BOOL=ON" + +fi + +if [ 1 ] ; then + + # Configure for TrilinosCouplings: + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TrilinosCouplings:BOOL=ON" + +fi + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON" + +if [ "${CMAKE_BUILD_TYPE}" == "DEBUG" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" +fi + +#----------------------------------------------------------------------------- +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Kokkos use pthread configuation: + +if [ "${USE_THREAD}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Kokkos use OpenMP configuation: + +if [ "${USE_OPENMP}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Hardware locality configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${USE_CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-DKOKKOS_HAVE_CUDA_ARCH=${USE_CUDA_ARCH}0;-gencode;arch=compute_${USE_CUDA_ARCH},code=sm_${USE_CUDA_ARCH}" + + if [ "${USE_OPENMP}" = "ON" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + fi + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${USE_INTEL}" = "ON" -o "${USE_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +# Cross-compile for Intel Xeon Phi: + +if [ "${USE_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -mmic" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- + +if [ -n "${CMAKE_CXX_FLAGS}" ] ; then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING='${CMAKE_CXX_FLAGS}'" + +fi + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo "cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}" + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_kokkos_bgq.sh b/lib/kokkos/config/configure_kokkos_bgq.sh new file mode 100755 index 0000000000..73236937ea --- /dev/null +++ b/lib/kokkos/config/configure_kokkos_bgq.sh @@ -0,0 +1,88 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# to build: +# build on bgq-b[1-12] +# module load sierra-devel +# run this configure file +# make + +# to run: +# ssh bgq-login +# cd /scratch/username/... +# export OMP_PROC_BIND and XLSMPOPTS environment variables +# run with srun + +# Note: hwloc does not work to get or set cpubindings on bgq. +# Use the openmp backend and the openmp environment variables. +# +# Only the mpi wrappers seem to be setup for cross-compile, +# so it is important that this configure enables MPI and uses mpigcc wrappers. + + + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="../Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=../TrilinosInstall/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=mpigcc-4.7.2" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=mpig++-4.7.2" + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_kokkos_dev.sh b/lib/kokkos/config/configure_kokkos_dev.sh new file mode 100755 index 0000000000..ac61dec602 --- /dev/null +++ b/lib/kokkos/config/configure_kokkos_dev.sh @@ -0,0 +1,216 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${HOME}/TrilinosInstall/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +#CMAKE_BUILD_TYPE=DEBUG +#CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +#CUDA_ARCH="" +#CUDA_ARCH="20" +#CUDA_ARCH="30" +CUDA_ARCH="35" + +# Build with OpenMP + +OPENMP=ON +PTHREADS=ON + +# Build host code with Intel compiler: + +INTEL=OFF + +# Build for MIC architecture: + +INTEL_XEON_PHI=OFF + +# Build with HWLOC at location: + +#HWLOC_BASE_DIR="" +#HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7" +HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" + +# Location for MPI to use in examples: + +#MPI_BASE_DIR="" +#MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7" +MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.7.3" +#MPI_BASE_DIR="/home/projects/openmpi/1.7.3/llvm/2013-12-02/" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +if [ "${PTHREADS}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +if [ "${OPENMP}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + + if [ "${OPENMP}" = "ON" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + fi + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_kokkos_nvidia.sh b/lib/kokkos/config/configure_kokkos_nvidia.sh new file mode 100644 index 0000000000..f78b7dce78 --- /dev/null +++ b/lib/kokkos/config/configure_kokkos_nvidia.sh @@ -0,0 +1,204 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/sems/common/kokkos/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +# CUDA_ARCH="" +CUDA_ARCH="20" +# CUDA_ARCH="30" +# CUDA_ARCH="35" + +# Build with OpenMP + +OPENMP=ON + +# Build host code with Intel compiler: + +# INTEL=ON + +# Build for MIC architecture: + +# INTEL_XEON_PHI=ON + +# Build with HWLOC at location: + +HWLOC_BASE_DIR="/home/sems/common/hwloc/current" + +# Location for MPI to use in examples: + +MPI_BASE_DIR="/home/sems/common/openmpi/current" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +if [ "${OPENMP}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + + if [ "${OPENMP}" = "ON" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + fi + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_shannon.sh b/lib/kokkos/config/configure_shannon.sh new file mode 100644 index 0000000000..8bd175b031 --- /dev/null +++ b/lib/kokkos/config/configure_shannon.sh @@ -0,0 +1,190 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +# CUDA_ARCH="" +# CUDA_ARCH="20" +# CUDA_ARCH="30" +CUDA_ARCH="35" + +# Build host code with Intel compiler: + +INTEL=ON + +# Build for MIC architecture: + +# INTEL_XEON_PHI=ON + +# Build with HWLOC at location: + +HWLOC_BASE_DIR="/home/projects/hwloc/1.6.2" + +# Location for MPI to use in examples: + +MPI_BASE_DIR="" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh b/lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh new file mode 100755 index 0000000000..0baa83aefe --- /dev/null +++ b/lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh @@ -0,0 +1,140 @@ +#!/bin/bash +# +# This script uses CUDA, OpenMP, and MPI. +# +# Before invoking this script, set the OMPI_CXX environment variable +# to point to nvcc_wrapper, wherever it happens to live. (If you use +# an MPI implementation other than OpenMPI, set the corresponding +# environment variable instead.) +# + +rm -f CMakeCache.txt; +rm -rf CMakeFiles +EXTRA_ARGS=$@ +MPI_PATH="/opt/mpi/openmpi/1.8.2/nvcc-gcc/4.8.3-6.5" +CUDA_PATH="/opt/nvidia/cuda/6.5.14" + +# +# As long as there are any .cu files in Trilinos, we'll need to set +# CUDA_NVCC_FLAGS. If Trilinos gets rid of all of its .cu files and +# lets nvcc_wrapper handle them as .cpp files, then we won't need to +# set CUDA_NVCC_FLAGS. As it is, given that we need to set +# CUDA_NVCC_FLAGS, we must make sure that they are the same flags as +# nvcc_wrapper passes to nvcc. +# +CUDA_NVCC_FLAGS="-gencode;arch=compute_35,code=sm_35;-I${MPI_PATH}/include" +CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" +CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3;-DKOKKOS_USE_CUDA_UVM" + +cmake \ + -D CMAKE_INSTALL_PREFIX:PATH="$PWD/../install/" \ + -D CMAKE_BUILD_TYPE:STRING=DEBUG \ + -D CMAKE_CXX_FLAGS:STRING="-g -Wall" \ + -D CMAKE_C_FLAGS:STRING="-g -Wall" \ + -D CMAKE_FORTRAN_FLAGS:STRING="" \ + -D CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS="" \ + -D Trilinos_ENABLE_Triutils=OFF \ + -D Trilinos_ENABLE_INSTALL_CMAKE_CONFIG_FILES:BOOL=OFF \ + -D Trilinos_ENABLE_DEBUG:BOOL=OFF \ + -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF \ + -D Trilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=OFF \ + -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING="" \ + -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF \ + -D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES:BOOL=OFF \ + -D BUILD_SHARED_LIBS:BOOL=OFF \ + -D DART_TESTING_TIMEOUT:STRING=600 \ + -D CMAKE_VERBOSE_MAKEFILE:BOOL=OFF \ + \ + \ + -D CMAKE_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \ + -D CMAKE_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \ + -D MPI_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \ + -D MPI_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \ + -D CMAKE_Fortran_COMPILER:FILEPATH="${MPI_PATH}/bin/mpif77" \ + -D MPI_EXEC:FILEPATH="${MPI_PATH}/bin/mpirun" \ + -D MPI_EXEC_POST_NUMPROCS_FLAGS:STRING="-bind-to;socket;--map-by;socket;env;CUDA_MANAGED_FORCE_DEVICE_ALLOC=1;CUDA_LAUNCH_BLOCKING=1;OMP_NUM_THREADS=2" \ + \ + \ + -D Trilinos_ENABLE_CXX11:BOOL=OFF \ + -D TPL_ENABLE_MPI:BOOL=ON \ + -D Trilinos_ENABLE_OpenMP:BOOL=ON \ + -D Trilinos_ENABLE_ThreadPool:BOOL=ON \ + \ + \ + -D TPL_ENABLE_CUDA:BOOL=ON \ + -D CUDA_TOOLKIT_ROOT_DIR:FILEPATH="${CUDA_PATH}" \ + -D CUDA_PROPAGATE_HOST_FLAGS:BOOL=OFF \ + -D TPL_ENABLE_Thrust:BOOL=OFF \ + -D Thrust_INCLUDE_DIRS:FILEPATH="${CUDA_PATH}/include" \ + -D TPL_ENABLE_CUSPARSE:BOOL=OFF \ + -D TPL_ENABLE_Cusp:BOOL=OFF \ + -D Cusp_INCLUDE_DIRS="/home/crtrott/Software/cusp" \ + -D CUDA_VERBOSE_BUILD:BOOL=OFF \ + -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS} \ + \ + \ + -D TPL_ENABLE_HWLOC=OFF \ + -D HWLOC_INCLUDE_DIRS="/usr/local/software/hwloc/current/include" \ + -D HWLOC_LIBRARY_DIRS="/usr/local/software/hwloc/current/lib" \ + -D TPL_ENABLE_BinUtils=OFF \ + -D TPL_ENABLE_BLAS:STRING=ON \ + -D TPL_ENABLE_LAPACK:STRING=ON \ + -D TPL_ENABLE_MKL:STRING=OFF \ + -D TPL_ENABLE_HWLOC:STRING=OFF \ + -D TPL_ENABLE_GTEST:STRING=ON \ + -D TPL_ENABLE_SuperLU=ON \ + -D TPL_ENABLE_BLAS=ON \ + -D TPL_ENABLE_LAPACK=ON \ + -D TPL_SuperLU_LIBRARIES="/home/crtrott/Software/SuperLU_4.3/lib/libsuperlu_4.3.a" \ + -D TPL_SuperLU_INCLUDE_DIRS="/home/crtrott/Software/SuperLU_4.3/SRC" \ + \ + \ + -D Trilinos_Enable_Kokkos:BOOL=ON \ + -D Trilinos_ENABLE_KokkosCore:BOOL=ON \ + -D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON \ + -D Trilinos_ENABLE_KokkosContainers:BOOL=ON \ + -D Trilinos_ENABLE_TpetraKernels:BOOL=ON \ + -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON \ + -D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON \ + -D Trilinos_ENABLE_KokkosExample:BOOL=ON \ + -D Kokkos_ENABLE_EXAMPLES:BOOL=ON \ + -D Kokkos_ENABLE_TESTS:BOOL=OFF \ + -D KokkosClassic_DefaultNode:STRING="Kokkos::Compat::KokkosCudaWrapperNode" \ + -D TpetraClassic_ENABLE_OpenMPNode=OFF \ + -D TpetraClassic_ENABLE_TPINode=OFF \ + -D TpetraClassic_ENABLE_MKL=OFF \ + -D Kokkos_ENABLE_Cuda_UVM=ON \ + \ + \ + -D Trilinos_ENABLE_Teuchos:BOOL=ON \ + -D Teuchos_ENABLE_COMPLEX:BOOL=OFF \ + \ + \ + -D Trilinos_ENABLE_Tpetra:BOOL=ON \ + -D Tpetra_ENABLE_KokkosCore=ON \ + -D Tpetra_ENABLE_Kokkos_DistObject=OFF \ + -D Tpetra_ENABLE_Kokkos_Refactor=ON \ + -D Tpetra_ENABLE_TESTS=ON \ + -D Tpetra_ENABLE_EXAMPLES=ON \ + -D Tpetra_ENABLE_MPI_CUDA_RDMA:BOOL=ON \ + \ + \ + -D Trilinos_ENABLE_Belos=OFF \ + -D Trilinos_ENABLE_Amesos=OFF \ + -D Trilinos_ENABLE_Amesos2=OFF \ + -D Trilinos_ENABLE_Ifpack=OFF \ + -D Trilinos_ENABLE_Ifpack2=OFF \ + -D Trilinos_ENABLE_Epetra=OFF \ + -D Trilinos_ENABLE_EpetraExt=OFF \ + -D Trilinos_ENABLE_Zoltan=OFF \ + -D Trilinos_ENABLE_Zoltan2=OFF \ + -D Trilinos_ENABLE_MueLu=OFF \ + -D Belos_ENABLE_TESTS=ON \ + -D Belos_ENABLE_EXAMPLES=ON \ + -D MueLu_ENABLE_TESTS=ON \ + -D MueLu_ENABLE_EXAMPLES=ON \ + -D Ifpack2_ENABLE_TESTS=ON \ + -D Ifpack2_ENABLE_EXAMPLES=ON \ + $EXTRA_ARGS \ +${HOME}/Trilinos + diff --git a/lib/kokkos/config/kokkos_dev/config-core-all.sh b/lib/kokkos/config/kokkos_dev/config-core-all.sh new file mode 100755 index 0000000000..fa588c778f --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-all.sh @@ -0,0 +1,113 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Cuda, OpenMP, Threads, Qthread, hwloc +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu +# +# The 'nvcc-wrapper' module should load a script that matches +# kokkos/config/nvcc_wrapper +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" + +#----------------------------------------------------------------------------- +# Cuda using GNU, use the nvcc_wrapper to build CUDA source + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Hardware locality configuration: + +HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" + +#----------------------------------------------------------------------------- +# Pthread + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# Qthread + +QTHREAD_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREAD:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_INCLUDE_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_LIBRARY_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/lib" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh b/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh new file mode 100755 index 0000000000..c2e17bb944 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh @@ -0,0 +1,104 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Cuda, OpenMP, hwloc +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu +# +# The 'nvcc-wrapper' module should load a script that matches +# kokkos/config/nvcc_wrapper +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" + +#----------------------------------------------------------------------------- +# Cuda using GNU, use the nvcc_wrapper to build CUDA source + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Hardware locality configuration: + +HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" + +#----------------------------------------------------------------------------- +# Pthread explicitly OFF so tribits doesn't automatically turn it on + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-cuda.sh b/lib/kokkos/config/kokkos_dev/config-core-cuda.sh new file mode 100755 index 0000000000..39b72d5ce1 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-cuda.sh @@ -0,0 +1,88 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Cuda +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu +# +# The 'nvcc-wrapper' module should load a script that matches +# kokkos/config/nvcc_wrapper +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" + +#----------------------------------------------------------------------------- +# Cuda using GNU, use the nvcc_wrapper to build CUDA source + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + +# Pthread explicitly OFF, otherwise tribits will automatically turn it on + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh new file mode 100755 index 0000000000..b83a535416 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh @@ -0,0 +1,84 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# C++11, OpenMP +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Pthread explicitly OFF so tribits doesn't automatically activate + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh b/lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh new file mode 100755 index 0000000000..d2e06a4ebd --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh @@ -0,0 +1,78 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Kokkos Pthread explicitly OFF, TPL Pthread ON for gtest + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh new file mode 100755 index 0000000000..e2ab1f1c00 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh @@ -0,0 +1,89 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Intel, OpenMP, Cuda +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 cuda/7.0.4 intel/2015.0.090 nvcc-wrapper/intel +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Pthread explicitly OFF + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh new file mode 100755 index 0000000000..fd56d41161 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh @@ -0,0 +1,84 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Intel, OpenMP +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 intel/13.SP1.1.106 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Pthread explicitly OFF + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-omp.sh new file mode 100755 index 0000000000..f91ecd5254 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-omp.sh @@ -0,0 +1,77 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# OpenMP +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +# Pthread explicitly OFF, otherwise tribits will automatically turn it on + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh b/lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh new file mode 100755 index 0000000000..19ab969023 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh @@ -0,0 +1,87 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Threads, hwloc +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Hardware locality configuration: + +HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" + +#----------------------------------------------------------------------------- +# Pthread + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/nvcc_wrapper b/lib/kokkos/config/nvcc_wrapper new file mode 100755 index 0000000000..d583866191 --- /dev/null +++ b/lib/kokkos/config/nvcc_wrapper @@ -0,0 +1,264 @@ +#!/bin/bash +# +# This shell script (nvcc_wrapper) wraps both the host compiler and +# NVCC, if you are building Trilinos with CUDA enabled. The script +# remedies some differences between the interface of NVCC and that of +# the host compiler, in particular for linking. It also means that +# Trilinos doesn't need separate .cu files; it can just use .cpp +# files. +# +# Hopefully, at some point, NVIDIA may fix NVCC so as to make this +# script obsolete. For now, this script exists and if you want to +# build Trilinos with CUDA enabled, you must use this script as your +# compiler. + +# Default settings: change those according to your machine. For +# example, you may have have two different wrappers with either icpc +# or g++ as their back-end compiler. The defaults can be overwritten +# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc). + +default_arch="sm_35" +#default_arch="sm_50" + +# +# The default C++ compiler. +# +host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"} +#host_compiler="icpc" +#host_compiler="/usr/local/gcc/4.8.3/bin/g++" +#host_compiler="/usr/local/gcc/4.9.1/bin/g++" + +# +# Internal variables +# + +# C++ files +cpp_files="" + +# Host compiler arguments +xcompiler_args="" + +# Cuda (NVCC) only arguments +cuda_args="" + +# Arguments for both NVCC and Host compiler +shared_args="" + +# Linker arguments +xlinker_args="" + +# Object files passable to NVCC +object_files="" + +# Link objects for the host linker only +object_files_xlinker="" + +# Does the User set the architecture +arch_set=0 + +# Does the user overwrite the host compiler +ccbin_set=0 + +#Error code of compilation +error_code=0 + +# Do a dry run without actually compiling +dry_run=0 + +# Skip NVCC compilation and use host compiler directly +host_only=0 + +# Enable workaround for CUDA 6.5 for pragma ident +replace_pragma_ident=0 + +# Mark first host compiler argument +first_xcompiler_arg=1 + +temp_dir=${TMPDIR:-/tmp} + +#echo "Arguments: $# $@" + +while [ $# -gt 0 ] +do + case $1 in + #show the executed command + --show|--nvcc-wrapper-show) + dry_run=1 + ;; + #run host compilation only + --host-only) + host_only=1 + ;; + #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros + --replace-pragma-ident) + replace_pragma_ident=1 + ;; + #handle source files to be compiled as cuda files + *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu) + cpp_files="$cpp_files $1" + ;; + #Handle shared args (valid for both nvcc and the host compiler) + -O*|-D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) + shared_args="$shared_args $1" + ;; + #Handle shared args that have an argument + -o|-MT) + shared_args="$shared_args $1 $2" + shift + ;; + #Handle known nvcc args + -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage) + cuda_args="$cuda_args $1" + ;; + #Handle known nvcc args that have an argument + -rdc|-maxrregcount|--default-stream) + cuda_args="$cuda_args $1 $2" + shift + ;; + #Handle c++11 setting + --std=c++11|-std=c++11) + shared_args="$shared_args $1" + ;; + #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98 + -std=c++98|--std=c++98) + ;; + #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor + -pedantic|-Wpedantic|-ansi) + ;; + #strip -Xcompiler because we add it + -Xcompiler) + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="$2" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$2" + fi + shift + ;; + #strip of "-x cu" because we add that + -x) + if [[ $2 != "cu" ]]; then + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="-x,$2" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,-x,$2" + fi + fi + shift + ;; + #Handle -ccbin (if its not set we can set it to a default value) + -ccbin) + cuda_args="$cuda_args $1 $2" + ccbin_set=1 + host_compiler=$2 + shift + ;; + #Handle -arch argument (if its not set use a default + -arch*) + cuda_args="$cuda_args $1" + arch_set=1 + ;; + #Handle -Xcudafe argument + -Xcudafe) + cuda_args="$cuda_args -Xcudafe $2" + shift + ;; + #Handle args that should be sent to the linker + -Wl*) + xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}" + host_linker_args="$host_linker_args ${1:4:${#1}}" + ;; + #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking + *.a|*.so|*.o|*.obj) + object_files="$object_files $1" + object_files_xlinker="$object_files_xlinker -Xlinker $1" + ;; + #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking + *.so.*|*.dylib) + object_files="$object_files -Xlinker $1" + object_files_xlinker="$object_files_xlinker -Xlinker $1" + ;; + #All other args are sent to the host compiler + *) + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args=$1 + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$1" + fi + ;; + esac + + shift +done + +#Add default host compiler if necessary +if [ $ccbin_set -ne 1 ]; then + cuda_args="$cuda_args -ccbin $host_compiler" +fi + +#Add architecture command +if [ $arch_set -ne 1 ]; then + cuda_args="$cuda_args -arch=$default_arch" +fi + +#Compose compilation command +nvcc_command="nvcc $cuda_args $shared_args $xlinker_args" +if [ $first_xcompiler_arg -eq 0 ]; then + nvcc_command="$nvcc_command -Xcompiler $xcompiler_args" +fi + +#Compose host only command +host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args" + +#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING' +if [ $replace_pragma_ident -eq 1 ]; then + cpp_files2="" + for file in $cpp_files + do + var=`grep pragma ${file} | grep ident | grep "#"` + if [ "${#var}" -gt 0 ] + then + sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file + cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file" + else + cpp_files2="$cpp_files2 $file" + fi + done + cpp_files=$cpp_files2 + #echo $cpp_files +fi + +if [ "$cpp_files" ]; then + nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files" +else + nvcc_command="$nvcc_command $object_files" +fi + +if [ "$cpp_files" ]; then + host_command="$host_command $object_files $cpp_files" +else + host_command="$host_command $object_files" +fi + +#Print command for dryrun +if [ $dry_run -eq 1 ]; then + if [ $host_only -eq 1 ]; then + echo $host_command + else + echo $nvcc_command + fi + exit 0 +fi + +#Run compilation command +if [ $host_only -eq 1 ]; then + $host_command +else + $nvcc_command +fi +error_code=$? + +#Report error code +exit $error_code diff --git a/lib/kokkos/config/snapshot.py b/lib/kokkos/config/snapshot.py new file mode 100755 index 0000000000..d816cd0c9c --- /dev/null +++ b/lib/kokkos/config/snapshot.py @@ -0,0 +1,279 @@ +#! /usr/bin/env python + +""" +Snapshot a project into another project and perform the necessary repo actions +to provide a commit message that can be used to trace back to the exact point +in the source repository. +""" + +#todo: +# Support svn +# Allow renaming of the source dir in the destination path +# Check if a new snapshot is necessary? +# + +import sys + +#check the version number so that there is a good error message when argparse is not available. +#This checks for exactly 2.7 which is bad, but it is a python 2 script and argparse was introduced +#in 2.7 which is also the last version of python 2. If this script is updated for python 3 this +#will need to change, but for now it is not safe to allow 3.x to run this. +if sys.version_info[:2] != (2, 7): + print "Error snapshot requires python 2.7 detected version is %d.%d." % (sys.version_info[0], sys.version_info[1]) + sys.exit(1) + +import subprocess, argparse, re, doctest, os, datetime, traceback + +def parse_cmdline(description): + parser = argparse.ArgumentParser(usage="snapshot.py [options] source destination", description=description) + + parser.add_argument("-n", "--no-comit", action="store_false", dest="create_commit", default=True, + help="Do not perform a commit or create a commit message.") + parser.add_argument("-v", "--verbose", action="store_true", dest="verbose_mode", default=False, + help="Enable verbose mode.") + parser.add_argument("-d", "--debug", action="store_true", dest="debug_mode", default=False, + help="Enable debugging output.") + parser.add_argument("--no-validate-repo", action="store_true", dest="no_validate_repo", default=False, + help="Reduce the validation that the source and destination repos are clean to a warning.") + parser.add_argument("--source-repo", choices=["git","none"], default="", + help="Type of repository of the source, use none to skip all repository operations.") + parser.add_argument("--dest-repo", choices=["git","none"], default="", + help="Type of repository of the destination, use none to skip all repository operations.") + + parser.add_argument("source", help="Source project to snapshot from.") + parser.add_argument("destination", help="Destination to snapshot too.") + + options = parser.parse_args() + options = validate_options(options) + return options +#end parseCmdline + +def validate_options(options): + apparent_source_repo_type="none" + apparent_dest_repo_type="none" + + #prevent user from accidentally giving us a path that rsync will treat differently than expected. + options.source = options.source.rstrip(os.sep) + options.destination = options.destination.rstrip(os.sep) + + options.source = os.path.abspath(options.source) + options.destination = os.path.abspath(options.destination) + + if os.path.exists(options.source): + apparent_source_repo_type, source_root = deterimine_repo_type(options.source) + else: + raise RuntimeError("Could not find source directory of %s." % options.source) + options.source_root = source_root + + if not os.path.exists(options.destination): + print "Could not find destination directory of %s so it will be created." % options.destination + os.makedirs(options.destination) + + apparent_dest_repo_type, dest_root = deterimine_repo_type(options.destination) + options.dest_root = dest_root + + #error on svn repo types for now + if apparent_source_repo_type == "svn" or apparent_dest_repo_type == "svn": + raise RuntimeError("SVN repositories are not supported at this time.") + + if options.source_repo == "": + #source repo type is not specified to just using the apparent type. + options.source_repo = apparent_source_repo_type + else: + if options.source_repo != "none" and options.source_repo != apparent_source_repo_type: + raise RuntimeError("Specified source repository type of %s conflicts with determined type of %s" % \ + (options.source_repo, apparent_source_repo_type)) + + if options.dest_repo == "": + #destination repo type is not specified to just using the apparent type. + options.dest_repo = apparent_dest_repo_type + else: + if options.dest_repo != "none" and options.dest_repo != apparent_dest_repo_type: + raise RuntimeError("Specified destination repository type of %s conflicts with determined type of %s" % \ + (options.dest_repo, apparent_dest_repo_type)) + + return options +#end validate_options + +def run_cmd(cmd, options, working_dir="."): + cmd_str = " ".join(cmd) + if options.verbose_mode: + print "Running command '%s' in dir %s." % (cmd_str, working_dir) + + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_dir) + proc_stdout, proc_stderr = proc.communicate() + ret_val = proc.wait() + + if options.debug_mode: + print "==== %s stdout start ====" % cmd_str + print proc_stdout + print "==== %s stdout end ====" % cmd_str + print "==== %s stderr ====" % cmd_str + print proc_stderr + print "==== %s stderr ====" % cmd_str + + if ret_val != 0: + raise RuntimeError("Command '%s' failed with error code %d. Error message:%s%s%sstdout:%s" % \ + (cmd_str, ret_val, os.linesep, proc_stderr, os.linesep, proc_stdout)) + + return proc_stdout, proc_stderr +#end run_cmd + +def deterimine_repo_type(location): + apparent_repo_type = "none" + + while location != "": + if os.path.exists(os.path.join(location, ".git")): + apparent_repo_type = "git" + break + elif os.path.exists(os.path.join(location, ".svn")): + apparent_repo_type = "svn" + break + else: + location = location[:location.rfind(os.sep)] + + return apparent_repo_type, location + +#end deterimine_repo_type + +def rsync(source, dest, options): + rsync_cmd = ["rsync", "-ar", "--delete"] + if options.debug_mode: + rsync_cmd.append("-v") + + if options.source_repo == "git": + rsync_cmd.append("--exclude=.git") + + rsync_cmd.append(options.source) + rsync_cmd.append(options.destination) + run_cmd(rsync_cmd, options) +#end rsync + +def create_commit_message(commit_id, commit_log, project_name, project_location): + eol = os.linesep + message = "Snapshot of %s from commit %s" % (project_name, commit_id) + message += eol * 2 + message += "From repository at %s" % project_location + message += eol * 2 + message += "At commit:" + eol + message += commit_log + return message +#end create_commit_message + +def find_git_commit_information(options): + r""" + >>> class fake_options: + ... source="." + ... verbose_mode=False + ... debug_mode=False + >>> myoptions = fake_options() + >>> find_git_commit_information(myoptions)[2:] + ('sems', 'software.sandia.gov:/git/sems') + """ + git_log_cmd = ["git", "log", "-1"] + + output, error = run_cmd(git_log_cmd, options, options.source) + + commit_match = re.match("commit ([0-9a-fA-F]+)", output) + commit_id = commit_match.group(1) + commit_log = output + + git_remote_cmd = ["git", "remote", "-v"] + output, error = run_cmd(git_remote_cmd, options, options.source) + + remote_match = re.search("origin\s([^ ]*/([^ ]+))", output, re.MULTILINE) + if not remote_match: + raise RuntimeError("Could not find origin of repo at %s. Consider using none for source repo type." % (options.source)) + + source_location = remote_match.group(1) + source_name = remote_match.group(2).strip() + + if source_name[-1] == "/": + source_name = source_name[:-1] + + return commit_id, commit_log, source_name, source_location + +#end find_git_commit_information + +def do_git_commit(message, options): + if options.verbose_mode: + print "Commiting to destination repository." + + git_add_cmd = ["git", "add", "-A"] + run_cmd(git_add_cmd, options, options.destination) + + git_commit_cmd = ["git", "commit", "-m%s" % message] + run_cmd(git_commit_cmd, options, options.destination) + + git_log_cmd = ["git", "log", "--format=%h", "-1"] + commit_sha1, error = run_cmd(git_log_cmd, options, options.destination) + + print "Commit %s was made to %s." % (commit_sha1.strip(), options.dest_root) +#end do_git_commit + +def verify_git_repo_clean(location, options): + git_status_cmd = ["git", "status", "--porcelain"] + output, error = run_cmd(git_status_cmd, options, location) + + if output != "": + if options.no_validate_repo == False: + raise RuntimeError("%s is not clean.%sPlease commit or stash all changes before running snapshot." + % (location, os.linesep)) + else: + print "WARNING: %s is not clean. Proceeding anyway." % location + print "WARNING: This could lead to differences in the source and destination." + print "WARNING: It could also lead to extra files being included in the snapshot commit." + +#end verify_git_repo_clean + +def main(options): + if options.verbose_mode: + print "Snapshotting %s to %s." % (options.source, options.destination) + + if options.source_repo == "git": + verify_git_repo_clean(options.source, options) + commit_id, commit_log, repo_name, repo_location = find_git_commit_information(options) + elif options.source_repo == "none": + commit_id = "N/A" + commit_log = "Unknown commit from %s snapshotted at: %s" % (options.source, datetime.datetime.now()) + repo_name = options.source + repo_location = options.source + + commit_message = create_commit_message(commit_id, commit_log, repo_name, repo_location) + os.linesep*2 + + if options.dest_repo == "git": + verify_git_repo_clean(options.destination, options) + + rsync(options.source, options.destination, options) + + if options.dest_repo == "git": + do_git_commit(commit_message, options) + elif options.dest_repo == "none": + file_name = "snapshot_message.txt" + message_file = open(file_name, "w") + message_file.write(commit_message) + message_file.close() + cwd = os.getcwd() + print "No commit done by request. Please use file at:" + print "%s%sif you wish to commit this to a repo later." % (cwd+"/"+file_name, os.linesep) + + + + +#end main + +if (__name__ == "__main__"): + if ("--test" in sys.argv): + doctest.testmod() + sys.exit(0) + + try: + options = parse_cmdline(__doc__) + main(options) + except RuntimeError, e: + print "Error occured:", e + if "--debug" in sys.argv: + traceback.print_exc() + sys.exit(1) + else: + sys.exit(0) diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia new file mode 100755 index 0000000000..add45b77b4 --- /dev/null +++ b/lib/kokkos/config/test_all_sandia @@ -0,0 +1,440 @@ +#!/bin/bash -e + +# +# Global config +# + +set -o pipefail + +GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" +INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" +CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial" +CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial" + +GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" +CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CUDA_WARNING_FLAGS="" + +BASE_MODULE_LIST="//base,hwloc/1.10.1///base" +CUDA_MODULE_LIST="/,gcc/4.7.2/base" + +export OMP_NUM_THREADS=4 + +declare -i NUM_RESULTS_TO_KEEP=7 + +RESULT_ROOT_PREFIX=TestAll + +source /projects/modulefiles/utils/sems-modules-init.sh +source /projects/modulefiles/utils/kokkos-modules-init.sh + +SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd ) + +# +# Handle arguments +# + +DEBUG=False +ARGS="" +CUSTOM_BUILD_LIST="" +DRYRUN=False +BUILD_ONLY=False +declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3 +TEST_SCRIPT=False + +while [[ $# > 0 ]] +do +key="$1" +case $key in +--kokkos-path*) +KOKKOS_PATH="${key#*=}" +;; +--build-list*) +CUSTOM_BUILD_LIST="${key#*=}" +;; +--debug*) +DEBUG=True +;; +--build-only*) +BUILD_ONLY=True +;; +--test-script*) +TEST_SCRIPT=True +;; +--num*) +NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}" +;; +--dry-run*) +DRYRUN=True +;; +--help) +echo "test_all_sandia :" +echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" +echo " Defaults to root repo containing this script" +echo "--debug: Run tests in debug. Defaults to False" +echo "--test-script: Test this script, not Kokkos" +echo "--num=N: Number of jobs to run in parallel " +echo "--dry-run: Just print what would be executed" +echo "--build-only: Just do builds, don't run anything" +echo "--build-list=BUILD,BUILD,BUILD..." +echo " Provide a comma-separated list of builds instead of running all builds" +echo " Valid items:" +echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial" +echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" +echo "" +echo "ARGS: list of expressions matching compilers to test" +echo " supported compilers" +echo " gcc/4.7.2" +echo " gcc/4.8.4" +echo " gcc/4.9.2" +echo " gcc/5.1.0" +echo " intel/14.0.4" +echo " intel/15.0.2" +echo " intel/16.0.1" +echo " clang/3.5.2" +echo " clang/3.6.1" +echo " cuda/6.5.14" +echo " cuda/7.0.28" +echo " cuda/7.5.18" +echo "" +echo "Examples:" +echo " Run all tests" +echo " % test_all_sandia" +echo "" +echo " Run all gcc tests" +echo " % test_all_sandia gcc" +echo "" +echo " Run all gcc/4.7.2 and all intel tests" +echo " % test_all_sandia gcc/4.7.2 intel" +echo "" +echo " Run all tests in debug" +echo " % test_all_sandia --debug" +echo "" +echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds" +echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial" +echo "" +echo "If you want to kill the tests, do:" +echo " hit ctrl-z" +echo " % kill -9 %1" +echo +exit 0 +;; +*) +# args, just append +ARGS="$ARGS $1" +;; +esac +shift +done + +# set kokkos path +if [ -z "$KOKKOS_PATH" ]; then + KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT +else + # Ensure KOKKOS_PATH is abs path + KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) +fi + +# set build type +if [ "$DEBUG" = "True" ]; then + BUILD_TYPE=debug +else + BUILD_TYPE=release +fi + +# If no args provided, do all compilers +if [ -z "$ARGS" ]; then + ARGS='?' +fi + +# Format: (compiler module-list build-list exe-name warning-flag) +COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "cuda/6.5.14 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + +# Process args to figure out which compilers to test +COMPILERS_TO_TEST="" +for ARG in $ARGS; do + for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + COMPILER=${ARR[0]} + if [[ "$COMPILER" = $ARG* ]]; then + if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then + COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER" + else + echo "Tried to add $COMPILER twice" + fi + fi + done +done + +# +# Functions +# + +# get_compiler_name +get_compiler_name() { + echo $1 | cut -d/ -f1 +} + +# get_compiler_version +get_compiler_version() { + echo $1 | cut -d/ -f2 +} + +# Do not call directly +get_compiler_data() { + local compiler=$1 + local item=$2 + local compiler_name=$(get_compiler_name $compiler) + local compiler_vers=$(get_compiler_version $compiler) + + local compiler_data + for compiler_data in "${COMPILERS[@]}" ; do + local arr=($compiler_data) + if [ "$compiler" = "${arr[0]}" ]; then + echo "${arr[$item]}" | tr , ' ' | sed -e "s//$compiler_name/g" -e "s//$compiler_vers/g" + return 0 + fi + done + + # Not found + echo "Unreconized compiler $compiler" >&2 + exit 1 +} + +# +# For all getters, usage: +# + +get_compiler_modules() { + get_compiler_data $1 1 +} + +get_compiler_build_list() { + get_compiler_data $1 2 +} + +get_compiler_exe_name() { + get_compiler_data $1 3 +} + +get_compiler_warning_flags() { + get_compiler_data $1 4 +} + +run_cmd() { + echo "RUNNING: $*" + if [ "$DRYRUN" != "True" ]; then + eval "$* 2>&1" + fi +} + +# report_and_log_test_results +report_and_log_test_result() { + # Use sane var names + local success=$1; local desc=$2; local phase=$3; + + if [ "$success" = "0" ]; then + echo " PASSED $desc" + touch $PASSED_DIR/$desc + else + echo " FAILED $desc" >&2 + echo $phase > $FAILED_DIR/$desc + cat ${desc}.${phase}.log + fi +} + +setup_env() { + local compiler=$1 + local compiler_modules=$(get_compiler_modules $compiler) + + module purge + + local mod + for mod in $compiler_modules; do + echo "Loading module $mod" + module load $mod 2>&1 + # It is ridiculously hard to check for the success of a loaded + # module. Module does not return error codes and piping to grep + # causes module to run in a subshell. + module list 2>&1 | grep "$mod" >& /dev/null || return 1 + done + + return 0 +} + +# single_build_and_test +single_build_and_test() { + # Use sane var names + local compiler=$1; local build=$2; local build_type=$3; + + # set up env + mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type" + cd $ROOT_DIR/$compiler/"${build}-$build_type" + local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g') + setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + + # Set up flags + local compiler_warning_flags=$(get_compiler_warning_flags $compiler) + local compiler_exe=$(get_compiler_exe_name $compiler) + + if [[ "$build_type" = hwloc* ]]; then + local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info))) + fi + + if [[ "$build_type" = *debug* ]]; then + local extra_args="$extra_args --debug" + local cxxflags="-g $compiler_warning_flags" + else + local cxxflags="-O3 $compiler_warning_flags" + fi + + if [[ "$compiler" == cuda* ]]; then + cxxflags="--keep --keep-dir=$(pwd) $cxxflags" + export TMPDIR=$(pwd) + fi + + # cxxflags="-DKOKKOS_USING_EXP_VIEW=1 $cxxflags" + + echo " Starting job $desc" + + if [ "$TEST_SCRIPT" = "True" ]; then + local rand=$[ 1 + $[ RANDOM % 10 ]] + sleep $rand + if [ $rand -gt 5 ]; then + run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; } + fi + else + run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + run_cmd make build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } + if [[ "$BUILD_ONLY" == False ]]; then + run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; } + fi + fi + + report_and_log_test_result 0 $desc + + return 0 +} + +# wait_for_jobs +wait_for_jobs() { + local -i max_jobs=$1 + local -i num_active_jobs=$(jobs | wc -l) + while [ $num_active_jobs -ge $max_jobs ] + do + sleep 1 + num_active_jobs=$(jobs | wc -l) + jobs >& /dev/null + done +} + +# run_in_background +run_in_background() { + local compiler=$1 + + local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL + if [[ "$BUILD_ONLY" == True ]]; then + num_jobs=8 + else + if [[ "$compiler" == cuda* ]]; then + num_jobs=1 + fi + fi + wait_for_jobs $num_jobs + + single_build_and_test $* & +} + +# build_and_test_all +build_and_test_all() { + # Get compiler data + local compiler=$1 + if [ -z "$CUSTOM_BUILD_LIST" ]; then + local compiler_build_list=$(get_compiler_build_list $compiler) + else + local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ') + fi + + # do builds + local build + for build in $compiler_build_list + do + run_in_background $compiler $build $BUILD_TYPE + + # If not cuda, do a hwloc test too + if [[ "$compiler" != cuda* ]]; then + run_in_background $compiler $build "hwloc-$BUILD_TYPE" + fi + done + + return 0 +} + +get_test_root_dir() { + local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort) + local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l) + local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP} + + if [ $num_to_delete -gt 0 ]; then + /bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete) + fi + + echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S") +} + +wait_summarize_and_exit() { + wait_for_jobs 1 + + echo "#######################################################" + echo "PASSED TESTS" + echo "#######################################################" + + \ls -1 $PASSED_DIR | sort + + echo "#######################################################" + echo "FAILED TESTS" + echo "#######################################################" + + local failed_test + local -i rv=0 + for failed_test in $(\ls -1 $FAILED_DIR) + do + echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)" + rv=$rv+1 + done + + exit $rv +} + +# +# Main +# + +ROOT_DIR=$(get_test_root_dir) +mkdir -p $ROOT_DIR +cd $ROOT_DIR + +PASSED_DIR=$ROOT_DIR/results/passed +FAILED_DIR=$ROOT_DIR/results/failed +mkdir -p $PASSED_DIR +mkdir -p $FAILED_DIR + +echo "Going to test compilers: " $COMPILERS_TO_TEST +for COMPILER in $COMPILERS_TO_TEST; do + echo "Testing compiler $COMPILER" + build_and_test_all $COMPILER +done + +wait_summarize_and_exit diff --git a/lib/kokkos/config/testing_scripts/README b/lib/kokkos/config/testing_scripts/README new file mode 100644 index 0000000000..455afffd84 --- /dev/null +++ b/lib/kokkos/config/testing_scripts/README @@ -0,0 +1,5 @@ +jenkins_test_driver is designed to be run through Jenkins as a +multiconfiguration job. It relies on a number of environment variables that will +only be set when run in that context. It is possible to override these if you +know the Jenkins job setup. It is not recommended that a non-expert try to run +this script directly. diff --git a/lib/kokkos/config/testing_scripts/jenkins_test_driver b/lib/kokkos/config/testing_scripts/jenkins_test_driver new file mode 100755 index 0000000000..9cba7fa518 --- /dev/null +++ b/lib/kokkos/config/testing_scripts/jenkins_test_driver @@ -0,0 +1,83 @@ +#!/bin/bash -x + +echo "Building for BUILD_TYPE = ${BUILD_TYPE}" +echo "Building with HOST_COMPILER = ${HOST_COMPILER}" +echo "Building in ${WORKSPACE}" + +module use /home/projects/modulefiles + +BUILD_TYPE=`echo $BUILD_TYPE | tr "~" " "` +build_options="" +for item in ${BUILD_TYPE}; do + build_options="$build_options --with-$item" +done + +kokkos_path=${WORKSPACE}/kokkos +gtest_path=${WORKSPACE}/kokkos/tpls/gtest + +echo ${WORKSPACE} +pwd + +#extract information from the provided parameters. +host_compiler_brand=`echo $HOST_COMPILER | grep -o "^[a-zA-Z]*"` +cuda_compiler=`echo $BUILD_TYPE | grep -o "cuda_[^ ]*"` + +host_compiler_module=`echo $HOST_COMPILER | tr "_" "/"` +cuda_compiler_module=`echo $cuda_compiler | tr "_" "/"` +build_path=`echo $BUILD_TYPE | tr " " "_"` + +module load $host_compiler_module +module load $cuda_compiler_module + +case $host_compiler_brand in + gcc) + module load nvcc-wrapper/gnu + compiler=g++ + ;; + intel) + module load nvcc-wrapper/intel + compiler=icpc + ;; + *) + echo "Unrecognized compiler brand." + exit 1 + ;; +esac + +#if cuda is on we need to set the host compiler for the +#nvcc wrapper and make the wrapper the compiler. +if [ $cuda_compiler != "" ]; then + export NVCC_WRAPPER_DEFAULT_COMPILER=$compiler + compiler=$kokkos_path/config/nvcc_wrapper +fi + +if [ $host_compiler_brand == "intel" -a $cuda_compiler != "" ]; then + echo "Intel compilers are not supported with cuda at this time." + exit 0 +fi + +rm -rf test-$build_path +mkdir test-$build_path +cd test-$build_path + +/bin/bash $kokkos_path/generate_makefile.bash $build_options --kokkos-path="$kokkos_path" --with-gtest="$gtest_path" --compiler=$compiler 2>&1 |tee configure.out + +if [ ${PIPESTATUS[0]} != 0 ]; then + echo "Configure failed." + exit 1 +fi + +make build-test 2>&1 | tee build.log + +if [ ${PIPESTATUS[0]} != 0 ]; then + echo "Build failed." + exit 1 +fi + +make test 2>&1 | tee test.log + +grep "FAIL" test.log +if [ $? == 0 ]; then + echo "Tests failed." + exit 1 +fi diff --git a/lib/kokkos/config/testing_scripts/obj_size_opt_check b/lib/kokkos/config/testing_scripts/obj_size_opt_check new file mode 100755 index 0000000000..47c84d1a92 --- /dev/null +++ b/lib/kokkos/config/testing_scripts/obj_size_opt_check @@ -0,0 +1,287 @@ +#! /usr/bin/env python + +""" +Compute the size at which the current compiler will start to +significantly scale back optimization. + +The CPP file being modified will need the following tags. +// JGF_DUPLICATE_BEGIN - Put before start of function to duplicate +// JGF_DUPLICATE_END - Put after end of function to duplcate +// JGF_DUPE function_name(args); - Put anywhere where it's legal to +put a function call but not in your timing section. + +The program will need to output the string: +FOM: +This will represent the program's performance +""" + +import argparse, sys, os, doctest, subprocess, re, time + +VERBOSE = False + +############################################################################### +def parse_command_line(args, description): +############################################################################### + parser = argparse.ArgumentParser( + usage="""\n%s [--verbose] +OR +%s --help +OR +%s --test + +\033[1mEXAMPLES:\033[0m + > %s foo.cpp 'make -j4' foo +""" % ((os.path.basename(args[0]), ) * 4), + +description=description, + +formatter_class=argparse.ArgumentDefaultsHelpFormatter +) + + parser.add_argument("cppfile", help="Name of file to modify.") + + parser.add_argument("buildcmd", help="Build command") + + parser.add_argument("execmd", help="Run command") + + parser.add_argument("-v", "--verbose", action="store_true", + help="Print extra information") + + parser.add_argument("-s", "--start", type=int, default=1, + help="Starting number of dupes") + + parser.add_argument("-e", "--end", type=int, default=1000, + help="Ending number of dupes") + + parser.add_argument("-n", "--repeat", type=int, default=10, + help="Number of times to repeat an individial execution. Best value will be taken.") + + parser.add_argument("-t", "--template", action="store_true", + help="Use templating instead of source copying to increase object size") + + parser.add_argument("-c", "--csv", action="store_true", + help="Print results as CSV") + + args = parser.parse_args(args[1:]) + + if (args.verbose): + global VERBOSE + VERBOSE = True + + return args.cppfile, args.buildcmd, args.execmd, args.start, args.end, args.repeat, args.template, args.csv + +############################################################################### +def verbose_print(msg, override=None): +############################################################################### + if ( (VERBOSE and not override is False) or override): + print msg + +############################################################################### +def error_print(msg): +############################################################################### + print >> sys.stderr, msg + +############################################################################### +def expect(condition, error_msg): +############################################################################### + """ + Similar to assert except doesn't generate an ugly stacktrace. Useful for + checking user error, not programming error. + """ + if (not condition): + raise SystemExit("FAIL: %s" % error_msg) + +############################################################################### +def run_cmd(cmd, ok_to_fail=False, input_str=None, from_dir=None, verbose=None, + arg_stdout=subprocess.PIPE, arg_stderr=subprocess.PIPE): +############################################################################### + verbose_print("RUN: %s" % cmd, verbose) + + if (input_str is not None): + stdin = subprocess.PIPE + else: + stdin = None + + proc = subprocess.Popen(cmd, + shell=True, + stdout=arg_stdout, + stderr=arg_stderr, + stdin=stdin, + cwd=from_dir) + output, errput = proc.communicate(input_str) + output = output.strip() if output is not None else output + stat = proc.wait() + + if (ok_to_fail): + return stat, output, errput + else: + if (arg_stderr is not None): + errput = errput if errput is not None else open(arg_stderr.name, "r").read() + expect(stat == 0, "Command: '%s' failed with error '%s'" % (cmd, errput)) + else: + expect(stat == 0, "Command: '%s' failed. See terminal output" % cmd) + return output + +############################################################################### +def build_and_run(source, cppfile, buildcmd, execmd, repeat): +############################################################################### + open(cppfile, 'w').writelines(source) + + run_cmd(buildcmd) + + best = None + for i in xrange(repeat): + wait_for_quiet_machine() + output = run_cmd(execmd) + + current = None + fom_regex = re.compile(r'^FOM: ([0-9.]+)$') + for line in output.splitlines(): + m = fom_regex.match(line) + if (m is not None): + current = float(m.groups()[0]) + break + + expect(current is not None, "No lines in output matched FOM regex") + + if (best is None or best < current): + best = current + + return best + +############################################################################### +def wait_for_quiet_machine(): +############################################################################### + while(True): + time.sleep(2) + + # The first iteration of top gives garbage results + idle_pct_raw = run_cmd("top -bn2 | grep 'Cpu(s)' | tr ',' ' ' | tail -n 1 | awk '{print $5}'") + + idle_pct_re = re.compile(r'^([0-9.]+)%id$') + m = idle_pct_re.match(idle_pct_raw) + + expect(m is not None, "top not returning output in expected form") + + idle_pct = float(m.groups()[0]) + if (idle_pct < 95): + error_print("Machine is too busy, waiting for it to become free") + else: + break + +############################################################################### +def add_n_dupes(curr_lines, num_dupes, template): +############################################################################### + function_name = None + function_invocation = None + function_lines = [] + + function_re = re.compile(r'^.* (\w+) *[(]') + function_inv_re = re.compile(r'^.*JGF_DUPE: +(.+)$') + + # Get function lines + record = False + definition_insertion_point = None + invocation_insertion_point = None + for idx, line in enumerate(curr_lines): + if ("JGF_DUPLICATE_BEGIN" in line): + record = True + m = function_re.match(curr_lines[idx+1]) + expect(m is not None, "Could not find function in line '%s'" % curr_lines[idx+1]) + function_name = m.groups()[0] + + elif ("JGF_DUPLICATE_END" in line): + record = False + definition_insertion_point = idx + 1 + + elif (record): + function_lines.append(line) + + elif ("JGF_DUPE" in line): + m = function_inv_re.match(line) + expect(m is not None, "Could not find function invocation example in line '%s'" % line) + function_invocation = m.groups()[0] + invocation_insertion_point = idx + 1 + + expect(function_name is not None, "Could not find name of dupe function") + expect(function_invocation is not None, "Could not find function invocation point") + + expect(definition_insertion_point < invocation_insertion_point, "fix me") + + dupe_func_defs = [] + dupe_invocations = ["int jgf_rand = std::rand();\n", "if (false) {}\n"] + + for i in xrange(num_dupes): + if (not template): + dupe_func = list(function_lines) + dupe_func[0] = dupe_func[0].replace(function_name, "%s%d" % (function_name, i)) + dupe_func_defs.extend(dupe_func) + + dupe_invocations.append("else if (jgf_rand == %d) " % i) + if (template): + dupe_call = function_invocation.replace(function_name, "%s<%d>" % (function_name, i)) + "\n" + else: + dupe_call = function_invocation.replace(function_name, "%s%d" % (function_name, i)) + "\n" + dupe_invocations.append(dupe_call) + + curr_lines[invocation_insertion_point:invocation_insertion_point] = dupe_invocations + curr_lines[definition_insertion_point:definition_insertion_point] = dupe_func_defs + +############################################################################### +def report(num_dupes, curr_lines, object_file, orig_fom, curr_fom, csv=False, is_first_report=False): +############################################################################### + fom_change = (curr_fom - orig_fom) / orig_fom + + if (csv): + if (is_first_report): + print "num_dupes, obj_byte_size, loc, fom, pct_diff" + + print "%s, %s, %s, %s, %s" % (num_dupes, os.path.getsize(object_file), len(curr_lines), curr_fom, fom_change*100) + else: + print "========================================================" + print "For number of dupes:", num_dupes + print "Object file size (bytes):", os.path.getsize(object_file) + print "Lines of code:", len(curr_lines) + print "Field of merit:", curr_fom + print "Change pct:", fom_change*100 + +############################################################################### +def obj_size_opt_check(cppfile, buildcmd, execmd, start, end, repeat, template, csv=False): +############################################################################### + orig_source_lines = open(cppfile, 'r').readlines() + + backup_file = "%s.orig" % cppfile + object_file = "%s.o" % os.path.splitext(cppfile)[0] + os.rename(cppfile, backup_file) + + orig_fom = build_and_run(orig_source_lines, cppfile, buildcmd, execmd, repeat) + report(0, orig_source_lines, object_file, orig_fom, orig_fom, csv=csv, is_first_report=True) + + i = start + while (i < end): + curr_lines = list(orig_source_lines) + add_n_dupes(curr_lines, i, template) + + curr_fom = build_and_run(curr_lines, cppfile, buildcmd, execmd, repeat) + + report(i, curr_lines, object_file, orig_fom, curr_fom, csv=csv) + + i *= 2 # make growth function configurable? + + os.remove(cppfile) + os.rename(backup_file, cppfile) + +############################################################################### +def _main_func(description): +############################################################################### + if ("--test" in sys.argv): + test_results = doctest.testmod(verbose=True) + sys.exit(1 if test_results.failed > 0 else 0) + + cppfile, buildcmd, execmd, start, end, repeat, template, csv = parse_command_line(sys.argv, description) + + obj_size_opt_check(cppfile, buildcmd, execmd, start, end, repeat, template, csv) + +############################################################################### +if (__name__ == "__main__"): + _main_func(__doc__) diff --git a/lib/kokkos/containers/CMakeLists.txt b/lib/kokkos/containers/CMakeLists.txt new file mode 100644 index 0000000000..894935fa01 --- /dev/null +++ b/lib/kokkos/containers/CMakeLists.txt @@ -0,0 +1,10 @@ + + +TRIBITS_SUBPACKAGE(Containers) + +ADD_SUBDIRECTORY(src) + +TRIBITS_ADD_TEST_DIRECTORIES(unit_tests) +TRIBITS_ADD_TEST_DIRECTORIES(performance_tests) + +TRIBITS_SUBPACKAGE_POSTPROCESS() diff --git a/lib/kokkos/containers/cmake/Dependencies.cmake b/lib/kokkos/containers/cmake/Dependencies.cmake new file mode 100644 index 0000000000..1d71d8af34 --- /dev/null +++ b/lib/kokkos/containers/cmake/Dependencies.cmake @@ -0,0 +1,5 @@ +TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( + LIB_REQUIRED_PACKAGES KokkosCore + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC + TEST_OPTIONAL_TPLS CUSPARSE + ) diff --git a/lib/kokkos/containers/cmake/KokkosContainers_config.h.in b/lib/kokkos/containers/cmake/KokkosContainers_config.h.in new file mode 100644 index 0000000000..d91fdda1e3 --- /dev/null +++ b/lib/kokkos/containers/cmake/KokkosContainers_config.h.in @@ -0,0 +1,4 @@ +#ifndef KOKKOS_CONTAINERS_CONFIG_H +#define KOKKOS_CONTAINERS_CONFIG_H + +#endif diff --git a/lib/kokkos/containers/performance_tests/CMakeLists.txt b/lib/kokkos/containers/performance_tests/CMakeLists.txt new file mode 100644 index 0000000000..6b57802935 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/CMakeLists.txt @@ -0,0 +1,26 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) + +SET(SOURCES + TestMain.cpp + TestCuda.cpp + ) + +IF(Kokkos_ENABLE_Pthread) + LIST( APPEND SOURCES TestThreads.cpp) +ENDIF() + +IF(Kokkos_ENABLE_OpenMP) + LIST( APPEND SOURCES TestOpenMP.cpp) +ENDIF() + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + PerformanceTest + SOURCES ${SOURCES} + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest + ) diff --git a/lib/kokkos/containers/performance_tests/Makefile b/lib/kokkos/containers/performance_tests/Makefile new file mode 100644 index 0000000000..e7abaf44ce --- /dev/null +++ b/lib/kokkos/containers/performance_tests/Makefile @@ -0,0 +1,81 @@ +KOKKOS_PATH = ../.. + +GTEST_PATH = ../../TPL/gtest + +vpath %.cpp ${KOKKOS_PATH}/containers/performance_tests + +default: build_all + echo "End Build" + + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(CXX) + LDFLAGS ?= -lpthread +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= $(CXX) + LDFLAGS ?= -lpthread +endif + +KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests + +TEST_TARGETS = +TARGETS = + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + OBJ_CUDA = TestCuda.o TestMain.o gtest-all.o + TARGETS += KokkosContainers_PerformanceTest_Cuda + TEST_TARGETS += test-cuda +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + OBJ_THREADS = TestThreads.o TestMain.o gtest-all.o + TARGETS += KokkosContainers_PerformanceTest_Threads + TEST_TARGETS += test-threads +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + OBJ_OPENMP = TestOpenMP.o TestMain.o gtest-all.o + TARGETS += KokkosContainers_PerformanceTest_OpenMP + TEST_TARGETS += test-openmp +endif + +KokkosContainers_PerformanceTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Cuda + +KokkosContainers_PerformanceTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Threads + +KokkosContainers_PerformanceTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_OpenMP + +test-cuda: KokkosContainers_PerformanceTest_Cuda + ./KokkosContainers_PerformanceTest_Cuda + +test-threads: KokkosContainers_PerformanceTest_Threads + ./KokkosContainers_PerformanceTest_Threads + +test-openmp: KokkosContainers_PerformanceTest_OpenMP + ./KokkosContainers_PerformanceTest_OpenMP + + +build_all: $(TARGETS) + +test: $(TEST_TARGETS) + +clean: kokkos-clean + rm -f *.o $(TARGETS) + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc + diff --git a/lib/kokkos/containers/performance_tests/TestCuda.cpp b/lib/kokkos/containers/performance_tests/TestCuda.cpp new file mode 100644 index 0000000000..aee262de93 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestCuda.cpp @@ -0,0 +1,100 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#if defined( KOKKOS_HAVE_CUDA ) + +#include + +#include + +#include + +namespace Performance { + +class cuda : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); + } + static void TearDownTestCase() + { + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } +}; + +TEST_F( cuda, global_2_local) +{ + std::cout << "Cuda" << std::endl; + std::cout << "size, create, generate, fill, find" << std::endl; + for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + test_global_to_local_ids(i); +} + +TEST_F( cuda, unordered_map_performance_near) +{ + Perf::run_performance_tests("cuda-near"); +} + +TEST_F( cuda, unordered_map_performance_far) +{ + Perf::run_performance_tests("cuda-far"); +} + +} + +#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ diff --git a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp new file mode 100644 index 0000000000..fb70b8fe2e --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp @@ -0,0 +1,231 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP +#define KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP + +#include +#include +#include +#include + +#include + +// This test will simulate global ids + +namespace Performance { + +static const unsigned begin_id_size = 256u; +static const unsigned end_id_size = 1u << 22; +static const unsigned id_step = 2u; + +union helper +{ + uint32_t word; + uint8_t byte[4]; +}; + + +template +struct generate_ids +{ + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + typedef Kokkos::View local_id_view; + + local_id_view local_2_global; + + generate_ids( local_id_view & ids) + : local_2_global(ids) + { + Kokkos::parallel_for(local_2_global.dimension_0(), *this); + } + + + KOKKOS_INLINE_FUNCTION + void operator()(size_type i) const + { + + helper x = {static_cast(i)}; + + // shuffle the bytes of i to create a unique, semi-random global_id + x.word = ~x.word; + + uint8_t tmp = x.byte[3]; + x.byte[3] = x.byte[1]; + x.byte[1] = tmp; + + tmp = x.byte[2]; + x.byte[2] = x.byte[0]; + x.byte[0] = tmp; + + local_2_global[i] = x.word; + } + +}; + +template +struct fill_map +{ + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + typedef Kokkos::View local_id_view; + typedef Kokkos::UnorderedMap global_id_view; + + global_id_view global_2_local; + local_id_view local_2_global; + + fill_map( global_id_view gIds, local_id_view lIds) + : global_2_local(gIds) , local_2_global(lIds) + { + Kokkos::parallel_for(local_2_global.dimension_0(), *this); + } + + KOKKOS_INLINE_FUNCTION + void operator()(size_type i) const + { + global_2_local.insert( local_2_global[i], i); + } + +}; + +template +struct find_test +{ + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + typedef Kokkos::View local_id_view; + typedef Kokkos::UnorderedMap global_id_view; + + global_id_view global_2_local; + local_id_view local_2_global; + + typedef size_t value_type; + + find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors) + : global_2_local(gIds) , local_2_global(lIds) + { + Kokkos::parallel_reduce(local_2_global.dimension_0(), *this, num_errors); + } + + KOKKOS_INLINE_FUNCTION + void init(value_type & v) const + { v = 0; } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type & dst, volatile value_type const & src) const + { dst += src; } + + KOKKOS_INLINE_FUNCTION + void operator()(size_type i, value_type & num_errors) const + { + uint32_t index = global_2_local.find( local_2_global[i] ); + + if ( global_2_local.value_at(index) != i) ++num_errors; + } + +}; + +template +void test_global_to_local_ids(unsigned num_ids) +{ + + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + + typedef Kokkos::View local_id_view; + typedef Kokkos::UnorderedMap global_id_view; + + //size + std::cout << num_ids << ", "; + + double elasped_time = 0; + Kokkos::Impl::Timer timer; + + local_id_view local_2_global("local_ids", num_ids); + global_id_view global_2_local((3u*num_ids)/2u); + + //create + elasped_time = timer.seconds(); + std::cout << elasped_time << ", "; + timer.reset(); + + // generate unique ids + { + generate_ids gen(local_2_global); + } + Device::fence(); + // generate + elasped_time = timer.seconds(); + std::cout << elasped_time << ", "; + timer.reset(); + + { + fill_map fill(global_2_local, local_2_global); + } + Device::fence(); + + // fill + elasped_time = timer.seconds(); + std::cout << elasped_time << ", "; + timer.reset(); + + + size_t num_errors = 0; + for (int i=0; i<100; ++i) + { + find_test find(global_2_local, local_2_global,num_errors); + } + Device::fence(); + + // find + elasped_time = timer.seconds(); + std::cout << elasped_time << std::endl; + + ASSERT_EQ( num_errors, 0u); +} + + +} // namespace Performance + + +#endif //KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP + diff --git a/lib/kokkos/containers/performance_tests/TestMain.cpp b/lib/kokkos/containers/performance_tests/TestMain.cpp new file mode 100644 index 0000000000..f952ab3db5 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestMain.cpp @@ -0,0 +1,50 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} + diff --git a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp new file mode 100644 index 0000000000..82a9311df7 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp @@ -0,0 +1,131 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include + +#include + +#include +#include + +#include +#include +#include +#include + + +namespace Performance { + +class openmp : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned num_threads = 4; + + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() + * Kokkos::hwloc::get_available_cores_per_numa() + * Kokkos::hwloc::get_available_threads_per_core() + ; + + } + + std::cout << "OpenMP: " << num_threads << std::endl; + + Kokkos::OpenMP::initialize( num_threads ); + + std::cout << "available threads: " << omp_get_max_threads() << std::endl; + } + + static void TearDownTestCase() + { + Kokkos::OpenMP::finalize(); + + omp_set_num_threads(1); + + ASSERT_EQ( 1 , omp_get_max_threads() ); + } +}; + +TEST_F( openmp, global_2_local) +{ + std::cout << "OpenMP" << std::endl; + std::cout << "size, create, generate, fill, find" << std::endl; + for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + test_global_to_local_ids(i); +} + +TEST_F( openmp, unordered_map_performance_near) +{ + unsigned num_openmp = 4; + if (Kokkos::hwloc::available()) { + num_openmp = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + std::ostringstream base_file_name; + base_file_name << "openmp-" << num_openmp << "-near"; + Perf::run_performance_tests(base_file_name.str()); +} + +TEST_F( openmp, unordered_map_performance_far) +{ + unsigned num_openmp = 4; + if (Kokkos::hwloc::available()) { + num_openmp = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + std::ostringstream base_file_name; + base_file_name << "openmp-" << num_openmp << "-far"; + Perf::run_performance_tests(base_file_name.str()); +} + +} // namespace test + diff --git a/lib/kokkos/containers/performance_tests/TestThreads.cpp b/lib/kokkos/containers/performance_tests/TestThreads.cpp new file mode 100644 index 0000000000..04d9dc0c18 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestThreads.cpp @@ -0,0 +1,126 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include + +#include + +#include + +#include +#include + +#include +#include +#include +#include + +namespace Performance { + +class threads : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned num_threads = 4; + + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + + std::cout << "Threads: " << num_threads << std::endl; + + Kokkos::Threads::initialize( num_threads ); + } + + static void TearDownTestCase() + { + Kokkos::Threads::finalize(); + } +}; + +TEST_F( threads, global_2_local) +{ + std::cout << "Threads" << std::endl; + std::cout << "size, create, generate, fill, find" << std::endl; + for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + test_global_to_local_ids(i); +} + +TEST_F( threads, unordered_map_performance_near) +{ + unsigned num_threads = 4; + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + std::ostringstream base_file_name; + base_file_name << "threads-" << num_threads << "-near"; + Perf::run_performance_tests(base_file_name.str()); +} + +TEST_F( threads, unordered_map_performance_far) +{ + unsigned num_threads = 4; + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + std::ostringstream base_file_name; + base_file_name << "threads-" << num_threads << "-far"; + Perf::run_performance_tests(base_file_name.str()); +} + +} // namespace Performance + + diff --git a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp new file mode 100644 index 0000000000..975800229c --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp @@ -0,0 +1,262 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP +#define KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP + +#include + +#include +#include +#include +#include +#include + + +namespace Perf { + +template +struct UnorderedMapTest +{ + typedef Device execution_space; + typedef Kokkos::UnorderedMap map_type; + typedef typename map_type::histogram_type histogram_type; + + struct value_type { + uint32_t failed_count; + uint32_t max_list; + }; + + uint32_t capacity; + uint32_t inserts; + uint32_t collisions; + double seconds; + map_type map; + histogram_type histogram; + + UnorderedMapTest( uint32_t arg_capacity, uint32_t arg_inserts, uint32_t arg_collisions) + : capacity(arg_capacity) + , inserts(arg_inserts) + , collisions(arg_collisions) + , seconds(0) + , map(capacity) + , histogram(map.get_histogram()) + { + Kokkos::Impl::Timer wall_clock ; + wall_clock.reset(); + + value_type v = {}; + int loop_count = 0; + do { + ++loop_count; + + v = value_type(); + Kokkos::parallel_reduce(inserts, *this, v); + + if (v.failed_count > 0u) { + const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + v.failed_count/collisions ; + map.rehash( new_capacity ); + } + } while (v.failed_count > 0u); + + seconds = wall_clock.seconds(); + + switch (loop_count) + { + case 1u: std::cout << " \033[0;32m" << loop_count << "\033[0m "; break; + case 2u: std::cout << " \033[1;31m" << loop_count << "\033[0m "; break; + default: std::cout << " \033[0;31m" << loop_count << "\033[0m "; break; + } + std::cout << std::setprecision(2) << std::fixed << std::setw(5) << (1e9*(seconds/(inserts))) << "; " << std::flush; + + histogram.calculate(); + Device::fence(); + } + + void print(std::ostream & metrics_out, std::ostream & length_out, std::ostream & distance_out, std::ostream & block_distance_out) + { + metrics_out << map.capacity() << " , "; + metrics_out << inserts/collisions << " , "; + metrics_out << (100.0 * inserts/collisions) / map.capacity() << " , "; + metrics_out << inserts << " , "; + metrics_out << (map.failed_insert() ? "true" : "false") << " , "; + metrics_out << collisions << " , "; + metrics_out << 1e9*(seconds/inserts) << " , "; + metrics_out << seconds << std::endl; + + length_out << map.capacity() << " , "; + length_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + length_out << collisions << " , "; + histogram.print_length(length_out); + + distance_out << map.capacity() << " , "; + distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + distance_out << collisions << " , "; + histogram.print_distance(distance_out); + + block_distance_out << map.capacity() << " , "; + block_distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + block_distance_out << collisions << " , "; + histogram.print_block_distance(block_distance_out); + } + + + KOKKOS_INLINE_FUNCTION + void init( value_type & v ) const + { + v.failed_count = 0; + v.max_list = 0; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & dst, const volatile value_type & src ) const + { + dst.failed_count += src.failed_count; + dst.max_list = src.max_list < dst.max_list ? dst.max_list : src.max_list; + } + + KOKKOS_INLINE_FUNCTION + void operator()(uint32_t i, value_type & v) const + { + const uint32_t key = Near ? i/collisions : i%(inserts/collisions); + typename map_type::insert_result result = map.insert(key,i); + v.failed_count += !result.failed() ? 0 : 1; + v.max_list = result.list_position() < v.max_list ? v.max_list : result.list_position(); + } + +}; + +//#define KOKKOS_COLLECT_UNORDERED_MAP_METRICS + +template +void run_performance_tests(std::string const & base_file_name) +{ +#if defined(KOKKOS_COLLECT_UNORDERED_MAP_METRICS) + std::string metrics_file_name = base_file_name + std::string("-metrics.csv"); + std::string length_file_name = base_file_name + std::string("-length.csv"); + std::string distance_file_name = base_file_name + std::string("-distance.csv"); + std::string block_distance_file_name = base_file_name + std::string("-block_distance.csv"); + + std::ofstream metrics_out( metrics_file_name.c_str(), std::ofstream::out ); + std::ofstream length_out( length_file_name.c_str(), std::ofstream::out ); + std::ofstream distance_out( distance_file_name.c_str(), std::ofstream::out ); + std::ofstream block_distance_out( block_distance_file_name.c_str(), std::ofstream::out ); + + + /* + const double test_ratios[] = { + 0.50 + , 0.75 + , 0.80 + , 0.85 + , 0.90 + , 0.95 + , 1.00 + , 1.25 + , 2.00 + }; + */ + + const double test_ratios[] = { 1.00 }; + + const int num_ratios = sizeof(test_ratios) / sizeof(double); + + /* + const uint32_t collisions[] { + 1 + , 4 + , 16 + , 64 + }; + */ + + const uint32_t collisions[] = { 16 }; + + const int num_collisions = sizeof(collisions) / sizeof(uint32_t); + + // set up file headers + metrics_out << "Capacity , Unique , Percent Full , Attempted Inserts , Failed Inserts , Collision Ratio , Nanoseconds/Inserts, Seconds" << std::endl; + length_out << "Capacity , Percent Full , "; + distance_out << "Capacity , Percent Full , "; + block_distance_out << "Capacity , Percent Full , "; + + for (int i=0; i<100; ++i) { + length_out << i << " , "; + distance_out << i << " , "; + block_distance_out << i << " , "; + } + + length_out << "\b\b\b " << std::endl; + distance_out << "\b\b\b " << std::endl; + block_distance_out << "\b\b\b " << std::endl; + + Kokkos::Impl::Timer wall_clock ; + for (int i=0; i < num_collisions ; ++i) { + wall_clock.reset(); + std::cout << "Collisions: " << collisions[i] << std::endl; + for (int j = 0; j < num_ratios; ++j) { + std::cout << std::setprecision(1) << std::fixed << std::setw(5) << (100.0*test_ratios[j]) << "% " << std::flush; + for (uint32_t capacity = 1<<14; capacity < 1<<25; capacity = capacity << 1) { + uint32_t inserts = static_cast(test_ratios[j]*(capacity)); + std::cout << capacity << std::flush; + UnorderedMapTest test(capacity, inserts*collisions[i], collisions[i]); + Device::fence(); + test.print(metrics_out, length_out, distance_out, block_distance_out); + } + std::cout << "\b\b " << std::endl; + + } + std::cout << " " << wall_clock.seconds() << " secs" << std::endl; + } + metrics_out.close(); + length_out.close(); + distance_out.close(); + block_distance_out.close(); +#else + (void)base_file_name; + std::cout << "skipping test" << std::endl; +#endif +} + + +} // namespace Perf + +#endif //KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP diff --git a/lib/kokkos/containers/src/CMakeLists.txt b/lib/kokkos/containers/src/CMakeLists.txt new file mode 100644 index 0000000000..da5a791530 --- /dev/null +++ b/lib/kokkos/containers/src/CMakeLists.txt @@ -0,0 +1,31 @@ + +TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +#----------------------------------------------------------------------------- + +SET(HEADERS "") +SET(SOURCES "") + +SET(HEADERS_IMPL "") + +FILE(GLOB HEADERS *.hpp) +FILE(GLOB HEADERS_IMPL impl/*.hpp) +FILE(GLOB SOURCES impl/*.cpp) + +SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) + +INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/) + +TRIBITS_ADD_LIBRARY( + kokkoscontainers + HEADERS ${HEADERS} + NOINSTALLHEADERS ${HEADERS_IMPL} + SOURCES ${SOURCES} + DEPLIBS + ) + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/containers/src/Kokkos_Bitset.hpp b/lib/kokkos/containers/src/Kokkos_Bitset.hpp new file mode 100644 index 0000000000..74da5f61b5 --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_Bitset.hpp @@ -0,0 +1,437 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_BITSET_HPP +#define KOKKOS_BITSET_HPP + +#include +#include + +#include + +#include + +namespace Kokkos { + +template +class Bitset; + +template +class ConstBitset; + +template +void deep_copy( Bitset & dst, Bitset const& src); + +template +void deep_copy( Bitset & dst, ConstBitset const& src); + +template +void deep_copy( ConstBitset & dst, ConstBitset const& src); + + +/// A thread safe view to a bitset +template +class Bitset +{ +public: + typedef Device execution_space; + typedef unsigned size_type; + + enum { BIT_SCAN_REVERSE = 1u }; + enum { MOVE_HINT_BACKWARD = 2u }; + + enum { + BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u + , BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE + , BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD + , BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD + }; + +private: + enum { block_size = static_cast(sizeof(unsigned)*CHAR_BIT) }; + enum { block_mask = block_size-1u }; + enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) }; + +public: + + + /// constructor + /// arg_size := number of bit in set + Bitset(unsigned arg_size = 0u) + : m_size(arg_size) + , m_last_block_mask(0u) + , m_blocks("Bitset", ((m_size + block_mask) >> block_shift) ) + { + for (int i=0, end = static_cast(m_size & block_mask); i < end; ++i) { + m_last_block_mask |= 1u << i; + } + } + + /// assignment + Bitset & operator = (Bitset const & rhs) + { + this->m_size = rhs.m_size; + this->m_last_block_mask = rhs.m_last_block_mask; + this->m_blocks = rhs.m_blocks; + + return *this; + } + + /// copy constructor + Bitset( Bitset const & rhs) + : m_size( rhs.m_size ) + , m_last_block_mask( rhs.m_last_block_mask ) + , m_blocks( rhs.m_blocks ) + {} + + /// number of bits in the set + /// can be call from the host or the device + KOKKOS_FORCEINLINE_FUNCTION + unsigned size() const + { return m_size; } + + /// number of bits which are set to 1 + /// can only be called from the host + unsigned count() const + { + Impl::BitsetCount< Bitset > f(*this); + return f.apply(); + } + + /// set all bits to 1 + /// can only be called from the host + void set() + { + Kokkos::deep_copy(m_blocks, ~0u ); + + if (m_last_block_mask) { + //clear the unused bits in the last block + typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy; + raw_deep_copy( m_blocks.ptr_on_device() + (m_blocks.dimension_0() -1u), &m_last_block_mask, sizeof(unsigned)); + } + } + + /// set all bits to 0 + /// can only be called from the host + void reset() + { + Kokkos::deep_copy(m_blocks, 0u ); + } + + /// set all bits to 0 + /// can only be called from the host + void clear() + { + Kokkos::deep_copy(m_blocks, 0u ); + } + + /// set i'th bit to 1 + /// can only be called from the device + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned i ) const + { + if ( i < m_size ) { + unsigned * block_ptr = &m_blocks[ i >> block_shift ]; + const unsigned mask = 1u << static_cast( i & block_mask ); + + return !( atomic_fetch_or( block_ptr, mask ) & mask ); + } + return false; + } + + /// set i'th bit to 0 + /// can only be called from the device + KOKKOS_FORCEINLINE_FUNCTION + bool reset( unsigned i ) const + { + if ( i < m_size ) { + unsigned * block_ptr = &m_blocks[ i >> block_shift ]; + const unsigned mask = 1u << static_cast( i & block_mask ); + + return atomic_fetch_and( block_ptr, ~mask ) & mask; + } + return false; + } + + /// return true if the i'th bit set to 1 + /// can only be called from the device + KOKKOS_FORCEINLINE_FUNCTION + bool test( unsigned i ) const + { + if ( i < m_size ) { + const unsigned block = volatile_load(&m_blocks[ i >> block_shift ]); + const unsigned mask = 1u << static_cast( i & block_mask ); + return block & mask; + } + return false; + } + + /// used with find_any_set_near or find_any_unset_near functions + /// returns the max number of times those functions should be call + /// when searching for an available bit + KOKKOS_FORCEINLINE_FUNCTION + unsigned max_hint() const + { + return m_blocks.dimension_0(); + } + + /// find a bit set to 1 near the hint + /// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found + /// and if result.first is false the result.second is a new hint + KOKKOS_INLINE_FUNCTION + Kokkos::pair find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const + { + const unsigned block_idx = (hint >> block_shift) < m_blocks.dimension_0() ? (hint >> block_shift) : 0; + const unsigned offset = hint & block_mask; + unsigned block = volatile_load(&m_blocks[ block_idx ]); + block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1)) ? block : block & m_last_block_mask ; + + return find_any_helper(block_idx, offset, block, scan_direction); + } + + /// find a bit set to 0 near the hint + /// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found + /// and if result.first is false the result.second is a new hint + KOKKOS_INLINE_FUNCTION + Kokkos::pair find_any_unset_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const + { + const unsigned block_idx = hint >> block_shift; + const unsigned offset = hint & block_mask; + unsigned block = volatile_load(&m_blocks[ block_idx ]); + block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1) ) ? ~block : ~block & m_last_block_mask ; + + return find_any_helper(block_idx, offset, block, scan_direction); + } + +private: + + KOKKOS_FORCEINLINE_FUNCTION + Kokkos::pair find_any_helper(unsigned block_idx, unsigned offset, unsigned block, unsigned scan_direction) const + { + Kokkos::pair result( block > 0u, 0); + + if (!result.first) { + result.second = update_hint( block_idx, offset, scan_direction ); + } + else { + result.second = scan_block( (block_idx << block_shift) + , offset + , block + , scan_direction + ); + } + return result; + } + + + KOKKOS_FORCEINLINE_FUNCTION + unsigned scan_block(unsigned block_start, int offset, unsigned block, unsigned scan_direction ) const + { + offset = !(scan_direction & BIT_SCAN_REVERSE) ? offset : (offset + block_mask) & block_mask; + block = Impl::rotate_right(block, offset); + return ((( !(scan_direction & BIT_SCAN_REVERSE) ? + Impl::bit_scan_forward(block) : + Impl::bit_scan_reverse(block) + ) + offset + ) & block_mask + ) + block_start; + } + + KOKKOS_FORCEINLINE_FUNCTION + unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const + { + block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1; + block_idx = block_idx >= 0 ? block_idx : m_blocks.dimension_0() - 1; + block_idx = block_idx < static_cast(m_blocks.dimension_0()) ? block_idx : 0; + + return static_cast(block_idx)*block_size + offset; + } + +private: + + unsigned m_size; + unsigned m_last_block_mask; + View< unsigned *, execution_space, MemoryTraits > m_blocks; + +private: + template + friend class Bitset; + + template + friend class ConstBitset; + + template + friend struct Impl::BitsetCount; + + template + friend void deep_copy( Bitset & dst, Bitset const& src); + + template + friend void deep_copy( Bitset & dst, ConstBitset const& src); +}; + +/// a thread-safe view to a const bitset +/// i.e. can only test bits +template +class ConstBitset +{ +public: + typedef Device execution_space; + typedef unsigned size_type; + +private: + enum { block_size = static_cast(sizeof(unsigned)*CHAR_BIT) }; + enum { block_mask = block_size -1u }; + enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) }; + +public: + ConstBitset() + : m_size (0) + {} + + ConstBitset(Bitset const& rhs) + : m_size(rhs.m_size) + , m_blocks(rhs.m_blocks) + {} + + ConstBitset(ConstBitset const& rhs) + : m_size( rhs.m_size ) + , m_blocks( rhs.m_blocks ) + {} + + ConstBitset & operator = (Bitset const & rhs) + { + this->m_size = rhs.m_size; + this->m_blocks = rhs.m_blocks; + + return *this; + } + + ConstBitset & operator = (ConstBitset const & rhs) + { + this->m_size = rhs.m_size; + this->m_blocks = rhs.m_blocks; + + return *this; + } + + + KOKKOS_FORCEINLINE_FUNCTION + unsigned size() const + { + return m_size; + } + + unsigned count() const + { + Impl::BitsetCount< ConstBitset > f(*this); + return f.apply(); + } + + KOKKOS_FORCEINLINE_FUNCTION + bool test( unsigned i ) const + { + if ( i < m_size ) { + const unsigned block = m_blocks[ i >> block_shift ]; + const unsigned mask = 1u << static_cast( i & block_mask ); + return block & mask; + } + return false; + } + +private: + + unsigned m_size; + View< const unsigned *, execution_space, MemoryTraits > m_blocks; + +private: + template + friend class ConstBitset; + + template + friend struct Impl::BitsetCount; + + template + friend void deep_copy( Bitset & dst, ConstBitset const& src); + + template + friend void deep_copy( ConstBitset & dst, ConstBitset const& src); +}; + + +template +void deep_copy( Bitset & dst, Bitset const& src) +{ + if (dst.size() != src.size()) { + throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + } + + typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; + raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0()); +} + +template +void deep_copy( Bitset & dst, ConstBitset const& src) +{ + if (dst.size() != src.size()) { + throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + } + + typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; + raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0()); +} + +template +void deep_copy( ConstBitset & dst, ConstBitset const& src) +{ + if (dst.size() != src.size()) { + throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + } + + typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; + raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0()); +} + +} // namespace Kokkos + +#endif //KOKKOS_BITSET_HPP diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp new file mode 100644 index 0000000000..1230df4d97 --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp @@ -0,0 +1,982 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Kokkos_DualView.hpp +/// \brief Declaration and definition of Kokkos::DualView. +/// +/// This header file declares and defines Kokkos::DualView and its +/// related nonmember functions. + +#ifndef KOKKOS_DUALVIEW_HPP +#define KOKKOS_DUALVIEW_HPP + +#include +#include + +namespace Kokkos { + +/* \class DualView + * \brief Container to manage mirroring a Kokkos::View that lives + * in device memory with a Kokkos::View that lives in host memory. + * + * This class provides capabilities to manage data which exists in two + * memory spaces at the same time. It keeps views of the same layout + * on two memory spaces as well as modified flags for both + * allocations. Users are responsible for setting the modified flags + * manually if they change the data in either memory space, by calling + * the sync() method templated on the device where they modified the + * data. Users may synchronize data by calling the modify() function, + * templated on the device towards which they want to synchronize + * (i.e., the target of the one-way copy operation). + * + * The DualView class also provides convenience methods such as + * realloc, resize and capacity which call the appropriate methods of + * the underlying Kokkos::View objects. + * + * The four template arguments are the same as those of Kokkos::View. + * (Please refer to that class' documentation for a detailed + * description.) + * + * \tparam DataType The type of the entries stored in the container. + * + * \tparam Layout The array's layout in memory. + * + * \tparam Device The Kokkos Device type. If its memory space is + * not the same as the host's memory space, then DualView will + * contain two separate Views: one in device memory, and one in + * host memory. Otherwise, DualView will only store one View. + * + * \tparam MemoryTraits (optional) The user's intended memory access + * behavior. Please see the documentation of Kokkos::View for + * examples. The default suffices for most users. + */ +template< class DataType , + class Arg1Type = void , + class Arg2Type = void , + class Arg3Type = void> +class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > +{ +public: + //! \name Typedefs for device types and various Kokkos::View specializations. + //@{ + typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ; + + //! The Kokkos Host Device type; + typedef typename traits::host_mirror_space host_mirror_space ; + + //! The type of a Kokkos::View on the device. + typedef View< typename traits::data_type , + Arg1Type , + Arg2Type , + Arg3Type > t_dev ; + + /// \typedef t_host + /// \brief The type of a Kokkos::View host mirror of \c t_dev. + typedef typename t_dev::HostMirror t_host ; + + //! The type of a const View on the device. + //! The type of a Kokkos::View on the device. + typedef View< typename traits::const_data_type , + Arg1Type , + Arg2Type , + Arg3Type > t_dev_const ; + + /// \typedef t_host_const + /// \brief The type of a const View host mirror of \c t_dev_const. + typedef typename t_dev_const::HostMirror t_host_const; + + //! The type of a const, random-access View on the device. + typedef View< typename traits::const_data_type , + typename traits::array_layout , + typename traits::device_type , + Kokkos::MemoryTraits > t_dev_const_randomread ; + + /// \typedef t_host_const_randomread + /// \brief The type of a const, random-access View host mirror of + /// \c t_dev_const_randomread. + typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread; + + //! The type of an unmanaged View on the device. + typedef View< typename traits::data_type , + typename traits::array_layout , + typename traits::device_type , + MemoryUnmanaged> t_dev_um; + + //! The type of an unmanaged View host mirror of \c t_dev_um. + typedef View< typename t_host::data_type , + typename t_host::array_layout , + typename t_host::device_type , + MemoryUnmanaged> t_host_um; + + //! The type of a const unmanaged View on the device. + typedef View< typename traits::const_data_type , + typename traits::array_layout , + typename traits::device_type , + MemoryUnmanaged> t_dev_const_um; + + //! The type of a const unmanaged View host mirror of \c t_dev_const_um. + typedef View t_host_const_um; + + //! The type of a const, random-access View on the device. + typedef View< typename t_host::const_data_type , + typename t_host::array_layout , + typename t_host::device_type , + Kokkos::MemoryTraits > t_dev_const_randomread_um ; + + /// \typedef t_host_const_randomread + /// \brief The type of a const, random-access View host mirror of + /// \c t_dev_const_randomread. + typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread_um; + + //@} + //! \name The two View instances. + //@{ + + t_dev d_view; + t_host h_view; + + //@} + //! \name Counters to keep track of changes ("modified" flags) + //@{ + + View modified_device; + View modified_host; + + //@} + //! \name Constructors + //@{ + + /// \brief Empty constructor. + /// + /// Both device and host View objects are constructed using their + /// default constructors. The "modified" flags are both initialized + /// to "unmodified." + DualView () : + modified_device (View ("DualView::modified_device")), + modified_host (View ("DualView::modified_host")) + {} + + /// \brief Constructor that allocates View objects on both host and device. + /// + /// This constructor works like the analogous constructor of View. + /// The first argument is a string label, which is entirely for your + /// benefit. (Different DualView objects may have the same label if + /// you like.) The arguments that follow are the dimensions of the + /// View objects. For example, if the View has three dimensions, + /// the first three integer arguments will be nonzero, and you may + /// omit the integer arguments that follow. + DualView (const std::string& label, + const size_t n0 = 0, + const size_t n1 = 0, + const size_t n2 = 0, + const size_t n3 = 0, + const size_t n4 = 0, + const size_t n5 = 0, + const size_t n6 = 0, + const size_t n7 = 0) + : d_view (label, n0, n1, n2, n3, n4, n5, n6, n7) + , h_view (create_mirror_view (d_view)) // without UVM, host View mirrors + , modified_device (View ("DualView::modified_device")) + , modified_host (View ("DualView::modified_host")) + {} + + //! Copy constructor (shallow copy) + template + DualView (const DualView& src) : + d_view (src.d_view), + h_view (src.h_view), + modified_device (src.modified_device), + modified_host (src.modified_host) + {} + + //! Subview constructor + template< class SD, class S1 , class S2 , class S3 + , class Arg0 , class ... Args > + DualView( const DualView & src + , const Arg0 & arg0 + , Args ... args + ) + : d_view( Kokkos::subview( src.d_view , arg0 , args ... ) ) + , h_view( Kokkos::subview( src.h_view , arg0 , args ... ) ) + , modified_device (src.modified_device) + , modified_host (src.modified_host) + {} + + /// \brief Create DualView from existing device and host View objects. + /// + /// This constructor assumes that the device and host View objects + /// are synchronized. You, the caller, are responsible for making + /// sure this is the case before calling this constructor. After + /// this constructor returns, you may use DualView's sync() and + /// modify() methods to ensure synchronization of the View objects. + /// + /// \param d_view_ Device View + /// \param h_view_ Host View (must have type t_host = t_dev::HostMirror) + DualView (const t_dev& d_view_, const t_host& h_view_) : + d_view (d_view_), + h_view (h_view_), + modified_device (View ("DualView::modified_device")), + modified_host (View ("DualView::modified_host")) + { +#if ! KOKKOS_USING_EXP_VIEW + Impl::assert_shapes_are_equal (d_view.shape (), h_view.shape ()); +#else + if ( int(d_view.rank) != int(h_view.rank) || + d_view.dimension_0() != h_view.dimension_0() || + d_view.dimension_1() != h_view.dimension_1() || + d_view.dimension_2() != h_view.dimension_2() || + d_view.dimension_3() != h_view.dimension_3() || + d_view.dimension_4() != h_view.dimension_4() || + d_view.dimension_5() != h_view.dimension_5() || + d_view.dimension_6() != h_view.dimension_6() || + d_view.dimension_7() != h_view.dimension_7() || + d_view.stride_0() != h_view.stride_0() || + d_view.stride_1() != h_view.stride_1() || + d_view.stride_2() != h_view.stride_2() || + d_view.stride_3() != h_view.stride_3() || + d_view.stride_4() != h_view.stride_4() || + d_view.stride_5() != h_view.stride_5() || + d_view.stride_6() != h_view.stride_6() || + d_view.stride_7() != h_view.stride_7() || + d_view.span() != h_view.span() ) { + Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views"); + } +#endif + } + + //@} + //! \name Methods for synchronizing, marking as modified, and getting Views. + //@{ + + /// \brief Return a View on a specific device \c Device. + /// + /// Please don't be afraid of the if_c expression in the return + /// value's type. That just tells the method what the return type + /// should be: t_dev if the \c Device template parameter matches + /// this DualView's device type, else t_host. + /// + /// For example, suppose you create a DualView on Cuda, like this: + /// \code + /// typedef Kokkos::DualView dual_view_type; + /// dual_view_type DV ("my dual view", 100); + /// \endcode + /// If you want to get the CUDA device View, do this: + /// \code + /// typename dual_view_type::t_dev cudaView = DV.view (); + /// \endcode + /// and if you want to get the host mirror of that View, do this: + /// \code + /// typedef typename Kokkos::HostSpace::execution_space host_device_type; + /// typename dual_view_type::t_host hostView = DV.view (); + /// \endcode + template< class Device > + KOKKOS_INLINE_FUNCTION + const typename Impl::if_c< + Impl::is_same::value, + t_dev, + t_host>::type& view () const + { + return Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value, + t_dev, + t_host >::select (d_view , h_view); + } + + /// \brief Update data on device or host only if data in the other + /// space has been marked as modified. + /// + /// If \c Device is the same as this DualView's device type, then + /// copy data from host to device. Otherwise, copy data from device + /// to host. In either case, only copy if the source of the copy + /// has been modified. + /// + /// This is a one-way synchronization only. If the target of the + /// copy has been modified, this operation will discard those + /// modifications. It will also reset both device and host modified + /// flags. + /// + /// \note This method doesn't know on its own whether you modified + /// the data in either View. You must manually mark modified data + /// as modified, by calling the modify() method with the + /// appropriate template parameter. + template + void sync( const typename Impl::enable_if< + ( Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) || + ( Impl::is_same< Device , int>::value) + , int >::type& = 0) + { + const unsigned int dev = + Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value , + unsigned int, + unsigned int>::select (1, 0); + + if (dev) { // if Device is the same as DualView's device type + if ((modified_host () > 0) && (modified_host () >= modified_device ())) { + deep_copy (d_view, h_view); + modified_host() = modified_device() = 0; + } + } else { // hopefully Device is the same as DualView's host type + if ((modified_device () > 0) && (modified_device () >= modified_host ())) { + deep_copy (h_view, d_view); + modified_host() = modified_device() = 0; + } + } + if(Impl::is_same::value) { + t_dev::execution_space::fence(); + t_host::execution_space::fence(); + } + } + + template + void sync ( const typename Impl::enable_if< + ( ! Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) || + ( Impl::is_same< Device , int>::value) + , int >::type& = 0 ) + { + const unsigned int dev = + Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value, + unsigned int, + unsigned int>::select (1, 0); + if (dev) { // if Device is the same as DualView's device type + if ((modified_host () > 0) && (modified_host () >= modified_device ())) { + Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype."); + } + } else { // hopefully Device is the same as DualView's host type + if ((modified_device () > 0) && (modified_device () >= modified_host ())) { + Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype."); + } + } + } + + template + bool need_sync() const + { + const unsigned int dev = + Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value , + unsigned int, + unsigned int>::select (1, 0); + + if (dev) { // if Device is the same as DualView's device type + if ((modified_host () > 0) && (modified_host () >= modified_device ())) { + return true; + } + } else { // hopefully Device is the same as DualView's host type + if ((modified_device () > 0) && (modified_device () >= modified_host ())) { + return true; + } + } + return false; + } + /// \brief Mark data as modified on the given device \c Device. + /// + /// If \c Device is the same as this DualView's device type, then + /// mark the device's data as modified. Otherwise, mark the host's + /// data as modified. + template + void modify () { + const unsigned int dev = + Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value, + unsigned int, + unsigned int>::select (1, 0); + + if (dev) { // if Device is the same as DualView's device type + // Increment the device's modified count. + modified_device () = (modified_device () > modified_host () ? + modified_device () : modified_host ()) + 1; + } else { // hopefully Device is the same as DualView's host type + // Increment the host's modified count. + modified_host () = (modified_device () > modified_host () ? + modified_device () : modified_host ()) + 1; + } + } + + //@} + //! \name Methods for reallocating or resizing the View objects. + //@{ + + /// \brief Reallocate both View objects. + /// + /// This discards any existing contents of the objects, and resets + /// their modified flags. It does not copy the old contents + /// of either View into the new View objects. + void realloc( const size_t n0 = 0 , + const size_t n1 = 0 , + const size_t n2 = 0 , + const size_t n3 = 0 , + const size_t n4 = 0 , + const size_t n5 = 0 , + const size_t n6 = 0 , + const size_t n7 = 0 ) { + ::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7); + h_view = create_mirror_view( d_view ); + + /* Reset dirty flags */ + modified_device() = modified_host() = 0; + } + + /// \brief Resize both views, copying old contents into new if necessary. + /// + /// This method only copies the old contents into the new View + /// objects for the device which was last marked as modified. + void resize( const size_t n0 = 0 , + const size_t n1 = 0 , + const size_t n2 = 0 , + const size_t n3 = 0 , + const size_t n4 = 0 , + const size_t n5 = 0 , + const size_t n6 = 0 , + const size_t n7 = 0 ) { + if(modified_device() >= modified_host()) { + /* Resize on Device */ + ::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7); + h_view = create_mirror_view( d_view ); + + /* Mark Device copy as modified */ + modified_device() = modified_device()+1; + + } else { + /* Realloc on Device */ + + ::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7); + t_host temp_view = create_mirror_view( d_view ); + + /* Remap on Host */ + Kokkos::deep_copy( temp_view , h_view ); + + h_view = temp_view; + + /* Mark Host copy as modified */ + modified_host() = modified_host()+1; + } + } + + //@} + //! \name Methods for getting capacity, stride, or dimension(s). + //@{ + + //! The allocation size (same as Kokkos::View::capacity). + size_t capacity() const { +#if KOKKOS_USING_EXP_VIEW + return d_view.span(); +#else + return d_view.capacity(); +#endif + } + + //! Get stride(s) for each dimension. + template< typename iType> + void stride(iType* stride_) const { + d_view.stride(stride_); + } + + /* \brief return size of dimension 0 */ + size_t dimension_0() const {return d_view.dimension_0();} + /* \brief return size of dimension 1 */ + size_t dimension_1() const {return d_view.dimension_1();} + /* \brief return size of dimension 2 */ + size_t dimension_2() const {return d_view.dimension_2();} + /* \brief return size of dimension 3 */ + size_t dimension_3() const {return d_view.dimension_3();} + /* \brief return size of dimension 4 */ + size_t dimension_4() const {return d_view.dimension_4();} + /* \brief return size of dimension 5 */ + size_t dimension_5() const {return d_view.dimension_5();} + /* \brief return size of dimension 6 */ + size_t dimension_6() const {return d_view.dimension_6();} + /* \brief return size of dimension 7 */ + size_t dimension_7() const {return d_view.dimension_7();} + + //@} +}; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// +// Partial specializations of Kokkos::subview() for DualView objects. +// + +#if KOKKOS_USING_EXP_VIEW + +namespace Kokkos { +namespace Impl { + +template< class D, class A1, class A2, class A3, class ... Args > +struct DualViewSubview { + + typedef typename Kokkos::Experimental::Impl::ViewMapping + < void + , Kokkos::ViewTraits< D, A1, A2, A3 > + , Args ... + >::traits_type dst_traits ; + + typedef Kokkos::DualView + < typename dst_traits::data_type + , typename dst_traits::array_layout + , typename dst_traits::device_type + , typename dst_traits::memory_traits + > type ; +}; + +} /* namespace Impl */ + + +template< class D , class A1 , class A2 , class A3 , class ... Args > +typename Impl::DualViewSubview::type +subview( const DualView & src , Args ... args ) +{ + return typename + Impl::DualViewSubview::type( src , args ... ); +} + +} /* namespace Kokkos */ + +#else + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// +// Partial specializations of Kokkos::subview() for DualView objects. +// + +namespace Kokkos { +namespace Impl { + +template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type + , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type + , class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type + > +struct ViewSubview< DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > + , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type + , SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type > +{ +private: + + typedef DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > SrcViewType ; + + enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 }; + enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 }; + enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 }; + enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 }; + enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 }; + enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 }; + enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 }; + enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 }; + + // The source view rank must be equal to the input argument rank + // Once a void argument is encountered all subsequent arguments must be void. + enum { InputRank = + Impl::StaticAssert<( SrcViewType::rank == + ( V0 ? 0 : ( + V1 ? 1 : ( + V2 ? 2 : ( + V3 ? 3 : ( + V4 ? 4 : ( + V5 ? 5 : ( + V6 ? 6 : ( + V7 ? 7 : 8 ))))))) )) + && + ( SrcViewType::rank == + ( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) ) + >::value ? SrcViewType::rank : 0 }; + + enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 }; + enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 }; + enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 }; + enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 }; + enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 }; + enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 }; + enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 }; + enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 }; + + enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3) + + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) }; + + // Reverse + enum { R0_rev = 0 == InputRank ? 0u : ( + 1 == InputRank ? unsigned(R0) : ( + 2 == InputRank ? unsigned(R1) : ( + 3 == InputRank ? unsigned(R2) : ( + 4 == InputRank ? unsigned(R3) : ( + 5 == InputRank ? unsigned(R4) : ( + 6 == InputRank ? unsigned(R5) : ( + 7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) }; + + typedef typename SrcViewType::array_layout SrcViewLayout ; + + // Choose array layout, attempting to preserve original layout if at all possible. + typedef typename Impl::if_c< + ( // Same Layout IF + // OutputRank 0 + ( OutputRank == 0 ) + || + // OutputRank 1 or 2, InputLayout Left, Interval 0 + // because single stride one or second index has a stride. + ( OutputRank <= 2 && R0 && Impl::is_same::value ) + || + // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1] + // because single stride one or second index has a stride. + ( OutputRank <= 2 && R0_rev && Impl::is_same::value ) + ), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ; + + // Choose data type as a purely dynamic rank array to accomodate a runtime range. + typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type , + typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *, + typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **, + typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***, + typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****, + typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****, + typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******, + typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******, + typename SrcViewType::value_type ******** + >::type >::type >::type >::type >::type >::type >::type >::type OutputData ; + + // Choose space. + // If the source view's template arg1 or arg2 is a space then use it, + // otherwise use the source view's execution space. + + typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type , + typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::execution_space + >::type >::type OutputSpace ; + +public: + + // If keeping the layout then match non-data type arguments + // else keep execution space and memory traits. + typedef typename + Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value + , Kokkos::DualView< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type > + , Kokkos::DualView< OutputData , OutputViewLayout , OutputSpace + , typename SrcViewType::memory_traits > + >::type type ; +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +namespace Kokkos { + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 > +typename Impl::ViewSubview< DualView + , ArgType0 , void , void , void + , void , void , void , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , void , void , void + , void , void , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0); + sub_view.h_view = subview(src.h_view,arg0); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , void , void + , void , void , void , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , void , void + , void , void , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1); + sub_view.h_view = subview(src.h_view,arg0,arg1); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , void + , void , void , void , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , void + , void , void , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , void , void , void , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , void , void , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , + class ArgType4 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , void , void , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 , + const ArgType4 & arg4 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , void , void ,void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , + class ArgType4 , class ArgType5 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , void , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 , + const ArgType4 & arg4 , + const ArgType5 & arg5 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , + class ArgType4 , class ArgType5 , class ArgType6 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , ArgType6 , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 , + const ArgType4 & arg4 , + const ArgType5 & arg5 , + const ArgType6 & arg6 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , ArgType6 , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , + class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , ArgType6 , ArgType7 + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 , + const ArgType4 & arg4 , + const ArgType5 & arg5 , + const ArgType6 & arg6 , + const ArgType7 & arg7 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , ArgType6 , ArgType7 + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +} // namespace Kokkos + +#endif /* KOKKOS_USING_EXP_VIEW */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +// +// Partial specialization of Kokkos::deep_copy() for DualView objects. +// + +template< class DT , class DL , class DD , class DM , + class ST , class SL , class SD , class SM > +void +deep_copy (DualView dst, // trust me, this must not be a reference + const DualView& src ) +{ + if (src.modified_device () >= src.modified_host ()) { + deep_copy (dst.d_view, src.d_view); + dst.template modify::device_type> (); + } else { + deep_copy (dst.h_view, src.h_view); + dst.template modify::host_mirror_space> (); + } +} + +template< class ExecutionSpace , + class DT , class DL , class DD , class DM , + class ST , class SL , class SD , class SM > +void +deep_copy (const ExecutionSpace& exec , + DualView dst, // trust me, this must not be a reference + const DualView& src ) +{ + if (src.modified_device () >= src.modified_host ()) { + deep_copy (exec, dst.d_view, src.d_view); + dst.template modify::device_type> (); + } else { + deep_copy (exec, dst.h_view, src.h_view); + dst.template modify::host_mirror_space> (); + } +} + +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp new file mode 100644 index 0000000000..0fc722c140 --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -0,0 +1,1075 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Kokkos_DynRankView.hpp +/// \brief Declaration and definition of Kokkos::Experimental::DynRankView. +/// +/// This header file declares and defines Kokkos::Experimental::DynRankView and its +/// related nonmember functions. +/* + * Changes from View + * 1. The rank of the DynRankView is returned by the method rank() + * 2. Max rank of a DynRankView is 7 + * 3. subview name is subdynrankview + * 4. Every subdynrankview is returned with LayoutStride + */ + +#ifndef KOKKOS_DYNRANKVIEW_HPP +#define KOKKOS_DYNRANKVIEW_HPP + +#include +#include +#include + +namespace Kokkos { +namespace Experimental { + +namespace Impl { + +template +struct DynRankDimTraits { + + // Compute the rank of the view from the nonzero dimension arguments. + KOKKOS_INLINE_FUNCTION + static size_t computeRank( const size_t N0 + , const size_t N1 + , const size_t N2 + , const size_t N3 + , const size_t N4 + , const size_t N5 + , const size_t N6 + , const size_t N7 ) + { + return + ( (N6 == 0 && N5 == 0 && N4 == 0 && N3 == 0 && N2 == 0 && N1 == 0 && N0 == 0) ? 0 + : ( (N6 == 0 && N5 == 0 && N4 == 0 && N3 == 0 && N2 == 0 && N1 == 0) ? 1 + : ( (N6 == 0 && N5 == 0 && N4 == 0 && N3 == 0 && N2 == 0) ? 2 + : ( (N6 == 0 && N5 == 0 && N4 == 0 && N3 == 0) ? 3 + : ( (N6 == 0 && N5 == 0 && N4 == 0) ? 4 + : ( (N6 == 0 && N5 == 0) ? 5 + : ( (N6 == 0) ? 6 + : 7 ) ) ) ) ) ) ); + } + + // Compute the rank of the view from the nonzero layout arguments. + template + KOKKOS_INLINE_FUNCTION + static size_t computeRank( const Layout& layout ) + { + return computeRank( layout.dimension[0] + , layout.dimension[1] + , layout.dimension[2] + , layout.dimension[3] + , layout.dimension[4] + , layout.dimension[5] + , layout.dimension[6] + , layout.dimension[7] ); + } + + // Create the layout for the rank-7 view. + // Non-strided Layout + template + KOKKOS_INLINE_FUNCTION + static typename std::enable_if< (std::is_same::value || std::is_same::value) , Layout >::type createLayout( const Layout& layout ) + { + return Layout( layout.dimension[0] != 0 ? layout.dimension[0] : 1 + , layout.dimension[1] != 0 ? layout.dimension[1] : 1 + , layout.dimension[2] != 0 ? layout.dimension[2] : 1 + , layout.dimension[3] != 0 ? layout.dimension[3] : 1 + , layout.dimension[4] != 0 ? layout.dimension[4] : 1 + , layout.dimension[5] != 0 ? layout.dimension[5] : 1 + , layout.dimension[6] != 0 ? layout.dimension[6] : 1 + , layout.dimension[7] != 0 ? layout.dimension[7] : 1 + ); + } + + // LayoutStride + template + KOKKOS_INLINE_FUNCTION + static typename std::enable_if< (std::is_same::value) , Layout>::type createLayout( const Layout& layout ) + { + return Layout( layout.dimension[0] != 0 ? layout.dimension[0] : 1 + , layout.stride[0] + , layout.dimension[1] != 0 ? layout.dimension[1] : 1 + , layout.stride[1] + , layout.dimension[2] != 0 ? layout.dimension[2] : 1 + , layout.stride[2] + , layout.dimension[3] != 0 ? layout.dimension[3] : 1 + , layout.stride[3] + , layout.dimension[4] != 0 ? layout.dimension[4] : 1 + , layout.stride[4] + , layout.dimension[5] != 0 ? layout.dimension[5] : 1 + , layout.stride[5] + , layout.dimension[6] != 0 ? layout.dimension[6] : 1 + , layout.stride[6] + , layout.dimension[7] != 0 ? layout.dimension[7] : 1 + , layout.stride[7] + ); + } + + // Create a view from the given dimension arguments. + // This is only necessary because the shmem constructor doesn't take a layout. + template + static ViewType createView( const ViewArg& arg + , const size_t N0 + , const size_t N1 + , const size_t N2 + , const size_t N3 + , const size_t N4 + , const size_t N5 + , const size_t N6 + , const size_t N7 ) + { + return ViewType( arg + , N0 != 0 ? N0 : 1 + , N1 != 0 ? N1 : 1 + , N2 != 0 ? N2 : 1 + , N3 != 0 ? N3 : 1 + , N4 != 0 ? N4 : 1 + , N5 != 0 ? N5 : 1 + , N6 != 0 ? N6 : 1 + , N7 != 0 ? N7 : 1 ); + } +}; + +} //end Impl + +/* \class DynRankView + * \brief Container that creates a Kokkos view with rank determined at runtime. + * Essentially this is a rank 7 view that wraps the access operators + * to yield the functionality of a view + * + * Changes from View + * 1. The rank of the DynRankView is returned by the method rank() + * 2. Max rank of a DynRankView is 7 + * 3. subview name is subdynrankview + * 4. Every subdynrankview is returned with LayoutStride + * + */ + +template< typename DataType , class ... Properties > +class DynRankView : private View< DataType*******, Properties... > +{ + static_assert( !std::is_array::value && !std::is_pointer::value , "Cannot template DynRankView with array or pointer datatype - must be pod" ); + +public: + using view_type = View< DataType******* , Properties...>; + using reference_type = typename view_type::reference_type; + +private: + template < class , class ... > friend class DynRankView ; + template< class , class ... > friend class Impl::ViewMapping ; + unsigned m_rank; + +public: + KOKKOS_INLINE_FUNCTION + view_type & DownCast() const { return static_cast< view_type & > (*this); } + KOKKOS_INLINE_FUNCTION + const view_type & ConstDownCast() const { return static_cast< const view_type & > (*this); } + + typedef ViewTraits< DataType , Properties ... > traits ; + + // Data type traits: + typedef typename traits::data_type data_type; + typedef typename traits::const_data_type const_data_type; + typedef typename traits::non_const_data_type non_const_data_type; + + // Compatible array of trivial type traits: + typedef typename traits::scalar_array_type scalar_array_type ; + typedef typename traits::const_scalar_array_type const_scalar_array_type ; + typedef typename traits::non_const_scalar_array_type non_const_scalar_array_type ; + + // Value type traits: + typedef typename traits::value_type value_type ; + typedef typename traits::const_value_type const_value_type ; + typedef typename traits::non_const_value_type non_const_value_type ; + + // Mapping traits: + typedef typename traits::array_layout array_layout ; + typedef typename traits::specialize specialize ; + + // Execution space, memory space, memory access traits, and host mirror space: + typedef typename traits::execution_space execution_space ; + typedef typename traits::memory_space memory_space ; + typedef typename traits::device_type device_type ; + typedef typename traits::memory_traits memory_traits ; + typedef typename traits::host_mirror_space host_mirror_space ; + + typedef typename traits::size_type size_type ; + + using view_type::is_hostspace ; + using view_type::is_managed ; + using view_type::is_random_access ; + + /** \brief Compatible view of array of scalar types */ + typedef DynRankView< typename traits::scalar_array_type , + typename traits::array_layout , + typename traits::device_type , + typename traits::memory_traits > + array_type ; + + /** \brief Compatible view of const data type */ + typedef DynRankView< typename traits::const_data_type , + typename traits::array_layout , + typename traits::device_type , + typename traits::memory_traits > + const_type ; + + /** \brief Compatible view of non-const data type */ + typedef DynRankView< typename traits::non_const_data_type , + typename traits::array_layout , + typename traits::device_type , + typename traits::memory_traits > + non_const_type ; + + /** \brief Compatible HostMirror view */ + typedef DynRankView< typename traits::non_const_data_type , + typename traits::array_layout , + typename traits::host_mirror_space > + HostMirror ; + + //---------------------------------------- + // Domain rank and extents + + KOKKOS_INLINE_FUNCTION + DynRankView() : view_type() , m_rank(0) {} + + KOKKOS_INLINE_FUNCTION + constexpr unsigned rank() const { return m_rank; } + + using view_type::extent; + using view_type::extent_int; + using view_type::layout; + using view_type::dimension; + using view_type::size; + using view_type::stride; + + using pointer_type = typename view_type::pointer_type; + using view_type::reference_type_is_lvalue_reference; + using view_type::span; + using view_type::capacity; + using view_type::span_is_contiguous; + using view_type::data; + using view_type::implementation_map; + + using view_type::is_contiguous; + using view_type::ptr_on_device; + + //Deprecated, remove soon (add for test) + using view_type::dimension_0; + using view_type::dimension_1; + using view_type::dimension_2; + using view_type::dimension_3; + using view_type::dimension_4; + using view_type::dimension_5; + using view_type::dimension_6; + using view_type::dimension_7; + using view_type::stride_0; + using view_type::stride_1; + using view_type::stride_2; + using view_type::stride_3; + using view_type::stride_4; + using view_type::stride_5; + using view_type::stride_6; + using view_type::stride_7; + + //operators () + // Rank 0 + KOKKOS_INLINE_FUNCTION + reference_type operator()() const + { return view_type::operator()(0,0,0,0,0,0,0); } + + // Rank 1 + // This assumes a contiguous underlying memory (i.e. no padding, no striding...) + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< std::is_same::value && std::is_integral::value, reference_type>::type + operator[](const iType & i0) const + { + return data()[i0]; + } + + // This assumes a contiguous underlying memory (i.e. no padding, no striding... + // AND a Trilinos/Sacado scalar type ) + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !std::is_same::value && std::is_integral::value, reference_type>::type + operator[](const iType & i0) const + { + auto map = implementation_map(); + + const size_t dim_scalar = map.dimension_scalar(); + const size_t bytes = this->span() / dim_scalar; + + typedef Kokkos::View > tmp_view_type; + tmp_view_type rankone_view(this->data(), bytes, dim_scalar); + return rankone_view(i0); + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + reference_type operator()(const iType & i0 ) const + { return view_type::operator()(i0,0,0,0,0,0,0); } + + // Rank 2 + template< typename iType0 , typename iType1 > + KOKKOS_INLINE_FUNCTION + reference_type operator()(const iType0 & i0 , const iType1 & i1 ) const + { return view_type::operator()(i0,i1,0,0,0,0,0); } + + // Rank 3 + template< typename iType0 , typename iType1 , typename iType2 > + KOKKOS_INLINE_FUNCTION + reference_type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { return view_type::operator()(i0,i1,i2,0,0,0,0); } + + // Rank 4 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > + KOKKOS_INLINE_FUNCTION + reference_type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { return view_type::operator()(i0,i1,i2,i3,0,0,0); } + + // Rank 5 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > + KOKKOS_INLINE_FUNCTION + reference_type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const + { return view_type::operator()(i0,i1,i2,i3,i4,0,0); } + + // Rank 6 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > + KOKKOS_INLINE_FUNCTION + reference_type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const + { return view_type::operator()(i0,i1,i2,i3,i4,i5,0); } + + // Rank 7 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 , typename iType6 > + KOKKOS_INLINE_FUNCTION + reference_type operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const + { return view_type::operator()(i0,i1,i2,i3,i4,i5,i6); } + + //---------------------------------------- + // Standard constructor, destructor, and assignment operators... + + KOKKOS_INLINE_FUNCTION + ~DynRankView() {} + + KOKKOS_INLINE_FUNCTION + DynRankView( const DynRankView & ) = default ; + + KOKKOS_INLINE_FUNCTION + DynRankView( DynRankView && ) = default ; + + KOKKOS_INLINE_FUNCTION + DynRankView & operator = ( const DynRankView & ) = default ; + + KOKKOS_INLINE_FUNCTION + DynRankView & operator = ( DynRankView && ) = default ; + + //---------------------------------------- + // Compatible view copy constructor and assignment + // may assign unmanaged from managed. + + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + DynRankView( const DynRankView & rhs ) + : view_type( rhs.ConstDownCast() ) + , m_rank(rhs.m_rank) + {} + + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + DynRankView & operator = (const DynRankView & rhs ) + { + view_type::operator = ( rhs.ConstDownCast() ); + m_rank = rhs.rank(); + return *this; + } + + //---------------------------------------- + // Allocation tracking properties + + using view_type::use_count; + using view_type::label; + + //---------------------------------------- + // Allocation according to allocation properties and array layout + + template< class ... P > + explicit inline + DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer + , typename traits::array_layout + >::type const & arg_layout + ) + : view_type( arg_prop + , Impl::DynRankDimTraits::createLayout(arg_layout) ) + , m_rank( Impl::DynRankDimTraits::computeRank(arg_layout) ) + {} + +//Wrappers + template< class ... P > + explicit KOKKOS_INLINE_FUNCTION + DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer + , typename traits::array_layout + >::type const & arg_layout + ) + : view_type( arg_prop + , Impl::DynRankDimTraits::createLayout(arg_layout) ) + , m_rank( Impl::DynRankDimTraits::computeRank(arg_layout) ) + {} + + //---------------------------------------- + //Constructor(s) + + // Simple dimension-only layout + template< class ... P > + explicit inline + DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer + , size_t + >::type const arg_N0 = 0 + , const size_t arg_N1 = 0 + , const size_t arg_N2 = 0 + , const size_t arg_N3 = 0 + , const size_t arg_N4 = 0 + , const size_t arg_N5 = 0 + , const size_t arg_N6 = 0 + , const size_t arg_N7 = 0 + ) + : DynRankView( arg_prop + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} + + template< class ... P > + explicit KOKKOS_INLINE_FUNCTION + DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer + , size_t + >::type const arg_N0 = 0 + , const size_t arg_N1 = 0 + , const size_t arg_N2 = 0 + , const size_t arg_N3 = 0 + , const size_t arg_N4 = 0 + , const size_t arg_N5 = 0 + , const size_t arg_N6 = 0 + , const size_t arg_N7 = 0 + ) + : DynRankView( arg_prop + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} + + // Allocate with label and layout + template< typename Label > + explicit inline + DynRankView( const Label & arg_label + , typename std::enable_if< + Kokkos::Experimental::Impl::is_view_label