Updating kokkos lib
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14918 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -1,123 +0,0 @@
|
|||||||
|
|
||||||
#
|
|
||||||
# A) Forward delcare the package so that certain options are also defined for
|
|
||||||
# subpackages
|
|
||||||
#
|
|
||||||
|
|
||||||
TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS)
|
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
# B) Define the common options for Kokkos first so they can be used by
|
|
||||||
# subpackages as well.
|
|
||||||
#
|
|
||||||
|
|
||||||
TRIBITS_ADD_DEBUG_OPTION()
|
|
||||||
|
|
||||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
|
||||||
Kokkos_ENABLE_SIERRA_BUILD
|
|
||||||
KOKKOS_FOR_SIERRA
|
|
||||||
"Configure Kokkos for building within the Sierra build system."
|
|
||||||
OFF
|
|
||||||
)
|
|
||||||
|
|
||||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
|
||||||
Kokkos_ENABLE_Cuda
|
|
||||||
KOKKOS_HAVE_CUDA
|
|
||||||
"Enable CUDA support in Kokkos."
|
|
||||||
"${TPL_ENABLE_CUDA}"
|
|
||||||
)
|
|
||||||
|
|
||||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
|
||||||
Kokkos_ENABLE_Cuda_UVM
|
|
||||||
KOKKOS_USE_CUDA_UVM
|
|
||||||
"Enable CUDA Unified Virtual Memory support in Kokkos."
|
|
||||||
OFF
|
|
||||||
)
|
|
||||||
|
|
||||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
|
||||||
Kokkos_ENABLE_Pthread
|
|
||||||
KOKKOS_HAVE_PTHREAD
|
|
||||||
"Enable Pthread support in Kokkos."
|
|
||||||
"${TPL_ENABLE_Pthread}"
|
|
||||||
)
|
|
||||||
|
|
||||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
|
||||||
Kokkos_ENABLE_OpenMP
|
|
||||||
KOKKOS_HAVE_OPENMP
|
|
||||||
"Enable OpenMP support in Kokkos."
|
|
||||||
"${${PROJECT_NAME}_ENABLE_OpenMP}"
|
|
||||||
)
|
|
||||||
|
|
||||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
|
||||||
Kokkos_ENABLE_QTHREAD
|
|
||||||
KOKKOS_HAVE_QTHREAD
|
|
||||||
"Enable QTHREAD support in Kokkos."
|
|
||||||
"${TPL_ENABLE_QTHREAD}"
|
|
||||||
)
|
|
||||||
|
|
||||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
|
||||||
Kokkos_ENABLE_CXX11
|
|
||||||
KOKKOS_HAVE_CXX11
|
|
||||||
"Enable C++11 support in Kokkos."
|
|
||||||
"${${PROJECT_NAME}_ENABLE_CXX11}"
|
|
||||||
)
|
|
||||||
|
|
||||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
|
||||||
Kokkos_ENABLE_HWLOC
|
|
||||||
KOKKOS_HAVE_HWLOC
|
|
||||||
"Enable HWLOC support in Kokkos."
|
|
||||||
"${TPL_ENABLE_HWLOC}"
|
|
||||||
)
|
|
||||||
|
|
||||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
|
||||||
Kokkos_ENABLE_MPI
|
|
||||||
KOKKOS_HAVE_MPI
|
|
||||||
"Enable MPI support in Kokkos."
|
|
||||||
"${TPL_ENABLE_MPI}"
|
|
||||||
)
|
|
||||||
|
|
||||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
|
||||||
Kokkos_ENABLE_Debug_Bounds_Check
|
|
||||||
KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
|
|
||||||
"Enable bounds checking support in Kokkos."
|
|
||||||
OFF
|
|
||||||
)
|
|
||||||
|
|
||||||
#TRIBITS_ADD_OPTION_AND_DEFINE(
|
|
||||||
# Kokkos_ENABLE_Profiling_Collect_Kernel_Data
|
|
||||||
# KOKKOS_ENABLE_PROFILING_COLLECT_KERNEL_DATA
|
|
||||||
# "Enable profiling support for kernel data collections in Kokkos."
|
|
||||||
# "${${PROJECT_NAME}_ENABLE_KokkosProfiler}"
|
|
||||||
# )
|
|
||||||
|
|
||||||
# placeholder for future device...
|
|
||||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
|
||||||
Kokkos_ENABLE_Winthread
|
|
||||||
KOKKOS_HAVE_WINTHREAD
|
|
||||||
"Enable Winthread support in Kokkos."
|
|
||||||
"${TPL_ENABLE_Winthread}"
|
|
||||||
)
|
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
# C) Process the subpackages for Kokkos
|
|
||||||
#
|
|
||||||
|
|
||||||
TRIBITS_PROCESS_SUBPACKAGES()
|
|
||||||
|
|
||||||
#
|
|
||||||
# D) If Kokkos itself is enabled, process the Kokkos package
|
|
||||||
#
|
|
||||||
|
|
||||||
TRIBITS_PACKAGE_DEF()
|
|
||||||
|
|
||||||
TRIBITS_EXCLUDE_AUTOTOOLS_FILES()
|
|
||||||
|
|
||||||
TRIBITS_EXCLUDE_FILES(
|
|
||||||
classic/doc
|
|
||||||
classic/LinAlg/doc/CrsRefactorNotesMay2012
|
|
||||||
)
|
|
||||||
|
|
||||||
TRIBITS_PACKAGE_POSTPROCESS()
|
|
||||||
|
|
||||||
@ -1,40 +0,0 @@
|
|||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
@ -1,73 +0,0 @@
|
|||||||
|
|
||||||
Developers of Kokkos (those who commit modifications to Kokkos)
|
|
||||||
must maintain the snapshot of Kokkos in the Trilinos repository.
|
|
||||||
|
|
||||||
This file contains instructions for how to
|
|
||||||
snapshot Kokkos from github.com/kokkos to Trilinos.
|
|
||||||
|
|
||||||
------------------------------------------------------------------------
|
|
||||||
*** EVERYTHING GOES RIGHT WORKFLOW ***
|
|
||||||
|
|
||||||
1) Given a 'git clone' of Kokkos and of Trilinos repositories.
|
|
||||||
1.1) Let ${KOKKOS} be the absolute path to the Kokkos clone.
|
|
||||||
This path *must* terminate with the directory name 'kokkos';
|
|
||||||
e.g., ${HOME}/kokkos .
|
|
||||||
1.2) Let ${TRILINOS} be the absolute path to the Trilinos directory.
|
|
||||||
|
|
||||||
2) Given that the Kokkos build & test is clean and
|
|
||||||
changes are committed to the Kokkos clone.
|
|
||||||
|
|
||||||
3) Snapshot the current commit in the Kokkos clone into the Trilinos clone.
|
|
||||||
This overwrites ${TRILINOS}/packages/kokkos with the content of ${KOKKOS}:
|
|
||||||
${KOKKOS}/config/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages
|
|
||||||
|
|
||||||
4) Verify the snapshot commit happened as expected
|
|
||||||
cd ${TRILINOS}/packages/kokkos
|
|
||||||
git log -1 --name-only
|
|
||||||
|
|
||||||
5) Modify, build, and test Trilinos with the Kokkos snapshot.
|
|
||||||
|
|
||||||
6) Given that that the Trilinos build & test is clean and
|
|
||||||
changes are committed to the Trilinos clone.
|
|
||||||
|
|
||||||
7) Attempt push to the Kokkos repository.
|
|
||||||
If push fails then you must 'remove the Kokkos snapshot'
|
|
||||||
from your Trilinos clone.
|
|
||||||
See below.
|
|
||||||
|
|
||||||
8) Attempt to push to the Trilinos repository.
|
|
||||||
If updating for a failed push requires you to change Kokkos you must
|
|
||||||
'remove the Kokkos snapshot' from your Trilinos clone.
|
|
||||||
See below.
|
|
||||||
|
|
||||||
------------------------------------------------------------------------
|
|
||||||
*** WHEN SOMETHING GOES WRONG AND YOU MUST ***
|
|
||||||
*** REMOVE THE KOKKOS SNAPSHOT FROM YOUR TRILINOS CLONE ***
|
|
||||||
|
|
||||||
1) Query the Trilinos clone commit log.
|
|
||||||
git log --oneline
|
|
||||||
|
|
||||||
2) Note the <SHA1> of the commit to the Trillinos clone
|
|
||||||
immediately BEFORE the Kokkos snapshot commit.
|
|
||||||
Copy this <SHA1> for use in the next command.
|
|
||||||
|
|
||||||
3) IF more than one outstanding commit then you can remove just the
|
|
||||||
Kokkos snapshot commit with 'git rebase -i'. Edit the rebase file.
|
|
||||||
Remove or comment out the Kokkos snapshot commit entry.
|
|
||||||
git rebase -i <SHA1>
|
|
||||||
|
|
||||||
4) IF the Kokkos snapshot commit is the one and only
|
|
||||||
outstanding commit then remove just than commit.
|
|
||||||
git reset --hard HEAD~1
|
|
||||||
|
|
||||||
------------------------------------------------------------------------
|
|
||||||
*** REGARDING 'snapshot.py' TOOL ***
|
|
||||||
|
|
||||||
The 'snapshot.py' tool is developed and maintained by the
|
|
||||||
Center for Computing Research (CCR)
|
|
||||||
Software Engineering, Maintenance, and Support (SEMS) team.
|
|
||||||
|
|
||||||
Contact Brent Perschbacher <bmpersc@sandia.gov> for questions>
|
|
||||||
|
|
||||||
------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,40 +0,0 @@
|
|||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
@ -1,431 +0,0 @@
|
|||||||
# Default settings common options
|
|
||||||
|
|
||||||
#LAMMPS specific settings:
|
|
||||||
KOKKOS_PATH=../../lib/kokkos
|
|
||||||
CXXFLAGS=$(CCFLAGS)
|
|
||||||
|
|
||||||
#Options: OpenMP,Serial,Pthreads,Cuda
|
|
||||||
#KOKKOS_DEVICES ?= "OpenMP"
|
|
||||||
KOKKOS_DEVICES ?= "Pthreads"
|
|
||||||
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,ARMv8,BGQ,Power7,Power8,KNL
|
|
||||||
KOKKOS_ARCH ?= ""
|
|
||||||
#Options: yes,no
|
|
||||||
KOKKOS_DEBUG ?= "no"
|
|
||||||
#Options: hwloc,librt,experimental_memkind
|
|
||||||
KOKKOS_USE_TPLS ?= ""
|
|
||||||
#Options: c++11
|
|
||||||
KOKKOS_CXX_STANDARD ?= "c++11"
|
|
||||||
#Options: aggressive_vectorization
|
|
||||||
KOKKOS_OPTIONS ?= "aggressive_vectorization"
|
|
||||||
|
|
||||||
#Default settings specific options
|
|
||||||
#Options: force_uvm,use_ldg,rdc,enable_lambda
|
|
||||||
KOKKOS_CUDA_OPTIONS ?= ""
|
|
||||||
|
|
||||||
# Check for general settings
|
|
||||||
|
|
||||||
KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
|
|
||||||
KOKKOS_INTERNAL_ENABLE_PROFILING_COLLECT_KERNEL_DATA := $(strip $(shell echo $(KOKKOS_PROFILING) | grep "kernel_times" | wc -l))
|
|
||||||
KOKKOS_INTERNAL_ENABLE_PROFILING_AGGREGATE_MPI := $(strip $(shell echo $(KOKKOS_PROFILING) | grep "aggregate_mpi" | wc -l))
|
|
||||||
KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
|
|
||||||
|
|
||||||
# Check for external libraries
|
|
||||||
KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l))
|
|
||||||
|
|
||||||
# Check for advanced settings
|
|
||||||
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l))
|
|
||||||
KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l))
|
|
||||||
KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l))
|
|
||||||
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
|
|
||||||
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l))
|
|
||||||
|
|
||||||
# Check for Kokkos Host Execution Spaces one of which must be on
|
|
||||||
|
|
||||||
KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_QTHREAD := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthread | wc -l))
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
|
|
||||||
KOKKOS_INTERNAL_USE_SERIAL := 1
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l)
|
|
||||||
KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l)
|
|
||||||
KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)
|
|
||||||
KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)
|
|
||||||
KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname | grep CYGWIN | wc -l)
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
|
||||||
KOKKOS_INTERNAL_OPENMP_FLAG := -mp
|
|
||||||
else
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
|
|
||||||
KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
|
|
||||||
else
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
|
||||||
# OpenMP is turned on by default in Cray compiler environment
|
|
||||||
KOKKOS_INTERNAL_OPENMP_FLAG :=
|
|
||||||
else
|
|
||||||
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
|
||||||
KOKKOS_INTERNAL_CXX11_FLAG := --c++11
|
|
||||||
else
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
|
|
||||||
KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
|
|
||||||
else
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
|
||||||
KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
|
|
||||||
else
|
|
||||||
KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Check for other Execution Spaces
|
|
||||||
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
|
|
||||||
|
|
||||||
# Check for Kokkos Architecture settings
|
|
||||||
|
|
||||||
#Intel based
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l))
|
|
||||||
|
|
||||||
#NVIDIA based
|
|
||||||
NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
|
||||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
|
||||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
|
||||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
|
||||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
|
||||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
|
||||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
|
||||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
|
||||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
|
||||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
|
||||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
|
||||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
|
||||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
|
|
||||||
endif
|
|
||||||
|
|
||||||
#ARM based
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l))
|
|
||||||
|
|
||||||
#IBM based
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc))
|
|
||||||
|
|
||||||
#AMD based
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
|
|
||||||
|
|
||||||
#Any AVX?
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW) | bc ))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
|
|
||||||
|
|
||||||
#Incompatible flags?
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc ))
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
|
|
||||||
$(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
|
|
||||||
endif
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1)
|
|
||||||
$(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
|
|
||||||
endif
|
|
||||||
|
|
||||||
#Generating the list of Flags
|
|
||||||
|
|
||||||
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
|
|
||||||
# No warnings:
|
|
||||||
KOKKOS_CXXFLAGS =
|
|
||||||
# INTEL and CLANG warnings:
|
|
||||||
#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
|
|
||||||
# GCC warnings:
|
|
||||||
#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized -Wignored-qualifiers -Wempty-body -Wclobbered
|
|
||||||
|
|
||||||
KOKKOS_LIBS = -lkokkos
|
|
||||||
KOKKOS_LDFLAGS = -L$(shell pwd)
|
|
||||||
KOKKOS_SRC =
|
|
||||||
KOKKOS_HEADERS =
|
|
||||||
|
|
||||||
#Generating the KokkosCore_config.h file
|
|
||||||
|
|
||||||
tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
|
|
||||||
tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp)
|
|
||||||
tmp := $(shell date >> KokkosCore_config.tmp)
|
|
||||||
tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp)
|
|
||||||
|
|
||||||
|
|
||||||
tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp)
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
|
||||||
tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp)
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
|
|
||||||
KOKKOS_CPPFLAGS += -I$(QTHREAD_PATH)/include
|
|
||||||
KOKKOS_LDFLAGS += -L$(QTHREAD_PATH)/lib
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREAD 1" >> KokkosCore_config.tmp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
|
|
||||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -G
|
|
||||||
endif
|
|
||||||
KOKKOS_CXXFLAGS += -g
|
|
||||||
KOKKOS_LDFLAGS += -g -ldl
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp )
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
|
|
||||||
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
|
|
||||||
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
|
|
||||||
KOKKOS_LIBS += -lhwloc
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp )
|
|
||||||
tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp )
|
|
||||||
tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp )
|
|
||||||
KOKKOS_LIBS += -lrt
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
|
||||||
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
|
|
||||||
KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
|
|
||||||
KOKKOS_LIBS += -lmemkind
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp)
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION 1" >> KokkosCore_config.tmp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
|
|
||||||
KOKKOS_CXXFLAGS += --relocatable-device-code=true
|
|
||||||
KOKKOS_LDFLAGS += --relocatable-device-code=true
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
|
|
||||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
|
|
||||||
KOKKOS_CXXFLAGS += -expt-extended-lambda
|
|
||||||
endif
|
|
||||||
|
|
||||||
#Add Architecture flags
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
|
||||||
KOKKOS_CXXFLAGS +=
|
|
||||||
KOKKOS_LDFLAGS +=
|
|
||||||
else
|
|
||||||
KOKKOS_CXXFLAGS += -mavx
|
|
||||||
KOKKOS_LDFLAGS += -mavx
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -mcpu=power8
|
|
||||||
KOKKOS_LDFLAGS += -mcpu=power8
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -xCORE-AVX2
|
|
||||||
KOKKOS_LDFLAGS += -xCORE-AVX2
|
|
||||||
else
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
|
||||||
|
|
||||||
else
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
|
||||||
|
|
||||||
else
|
|
||||||
# Assume that this is a really a GNU compiler
|
|
||||||
KOKKOS_CXXFLAGS += -march=core-avx2
|
|
||||||
KOKKOS_LDFLAGS += -march=core-avx2
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -xMIC-AVX512
|
|
||||||
KOKKOS_LDFLAGS += -xMIC-AVX512
|
|
||||||
else
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
|
||||||
|
|
||||||
else
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
|
||||||
|
|
||||||
else
|
|
||||||
# Asssume that this is really a GNU compiler
|
|
||||||
KOKKOS_CXXFLAGS += -march=knl
|
|
||||||
KOKKOS_LDFLAGS += -march=knl
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -mmic
|
|
||||||
KOKKOS_LDFLAGS += -mmic
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -arch=sm_30
|
|
||||||
endif
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -arch=sm_32
|
|
||||||
endif
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -arch=sm_35
|
|
||||||
endif
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -arch=sm_37
|
|
||||||
endif
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -arch=sm_50
|
|
||||||
endif
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -arch=sm_52
|
|
||||||
endif
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -arch=sm_53
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h)
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
|
|
||||||
KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l))
|
|
||||||
else
|
|
||||||
KOKKOS_INTERNAL_NEW_CONFIG := 1
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
|
|
||||||
tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h)
|
|
||||||
endif
|
|
||||||
|
|
||||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
|
|
||||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
|
|
||||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
|
|
||||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
|
|
||||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
|
|
||||||
|
|
||||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp)
|
|
||||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|
||||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
|
|
||||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
|
|
||||||
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
|
|
||||||
KOKKOS_LIBS += -lcudart -lcuda
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
|
||||||
KOKKOS_LIBS += -lpthread
|
|
||||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
|
|
||||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
|
|
||||||
KOKKOS_LIBS += -lqthread
|
|
||||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.cpp)
|
|
||||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp)
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
|
||||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
|
|
||||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
|
||||||
else
|
|
||||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
|
||||||
endif
|
|
||||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
|
||||||
endif
|
|
||||||
|
|
||||||
#With Cygwin functions such as fdopen and fileno are not defined
|
|
||||||
#when strict ansi is enabled. strict ansi gets enabled with --std=c++11
|
|
||||||
#though. So we hard undefine it here. Not sure if that has any bad side effects
|
|
||||||
#This is needed for gtest actually, not for Kokkos itself!
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1)
|
|
||||||
KOKKOS_CXXFLAGS += -U__STRICT_ANSI__
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Setting up dependencies
|
|
||||||
|
|
||||||
KokkosCore_config.h:
|
|
||||||
|
|
||||||
KOKKOS_CPP_DEPENDS := KokkosCore_config.h $(KOKKOS_HEADERS)
|
|
||||||
|
|
||||||
KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o)
|
|
||||||
KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ))
|
|
||||||
|
|
||||||
include $(KOKKOS_PATH)/Makefile.targets
|
|
||||||
|
|
||||||
kokkos-clean:
|
|
||||||
rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
|
|
||||||
|
|
||||||
libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
|
|
||||||
ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
|
|
||||||
ranlib libkokkos.a
|
|
||||||
|
|
||||||
KOKKOS_LINK_DEPENDS=libkokkos.a
|
|
||||||
@ -1,62 +0,0 @@
|
|||||||
Kokkos_UnorderedMap_impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
|
|
||||||
Kokkos_AllocationTracker.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp
|
|
||||||
Kokkos_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp
|
|
||||||
Kokkos_Core.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp
|
|
||||||
Kokkos_Error.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp
|
|
||||||
Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
|
|
||||||
Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
|
|
||||||
Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
|
|
||||||
Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
|
|
||||||
Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
|
|
||||||
Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
|
|
||||||
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
|
|
||||||
KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|
||||||
Kokkos_Cuda_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
|
|
||||||
Kokkos_Cuda_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
|
|
||||||
Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
|
||||||
Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
|
|
||||||
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
|
||||||
Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
|
|
||||||
Kokkos_QthreadExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp
|
|
||||||
Kokkos_Qthread_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
|
||||||
Kokkos_OpenMPexec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
|
|
||||||
endif
|
|
||||||
|
|
||||||
Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
|
|
||||||
Kokkos_HBWAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp
|
|
||||||
|
|
||||||
@ -1,134 +0,0 @@
|
|||||||
Kokkos implements a programming model in C++ for writing performance portable
|
|
||||||
applications targeting all major HPC platforms. For that purpose it provides
|
|
||||||
abstractions for both parallel execution of code and data management.
|
|
||||||
Kokkos is designed to target complex node architectures with N-level memory
|
|
||||||
hierarchies and multiple types of execution resources. It currently can use
|
|
||||||
OpenMP, Pthreads and CUDA as backend programming models.
|
|
||||||
|
|
||||||
The core developers of Kokkos are Carter Edwards and Christian Trott
|
|
||||||
at the Computer Science Research Institute of the Sandia National
|
|
||||||
Laboratories.
|
|
||||||
|
|
||||||
The KokkosP interface and associated tools are developed by the Application
|
|
||||||
Performance Team and Kokkos core developers at Sandia National Laboratories.
|
|
||||||
|
|
||||||
To learn more about Kokkos consider watching one of our presentations:
|
|
||||||
GTC 2015:
|
|
||||||
http://on-demand.gputechconf.com/gtc/2015/video/S5166.html
|
|
||||||
http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf
|
|
||||||
|
|
||||||
A programming guide can be found under doc/Kokkos_PG.pdf. This is an initial version
|
|
||||||
and feedback is greatly appreciated.
|
|
||||||
|
|
||||||
A separate repository with extensive tutorial material can be found under
|
|
||||||
https://github.com/kokkos/kokkos-tutorials.
|
|
||||||
|
|
||||||
If you have a patch to contribute please feel free to issue a pull request against
|
|
||||||
the develop branch. For major contributions it is better to contact us first
|
|
||||||
for guidance.
|
|
||||||
|
|
||||||
For questions please send an email to
|
|
||||||
kokkos-users@software.sandia.gov
|
|
||||||
|
|
||||||
For non-public questions send an email to
|
|
||||||
hcedwar(at)sandia.gov and crtrott(at)sandia.gov
|
|
||||||
|
|
||||||
============================================================================
|
|
||||||
====Requirements============================================================
|
|
||||||
============================================================================
|
|
||||||
|
|
||||||
Primary tested compilers are:
|
|
||||||
GCC 4.7.2
|
|
||||||
GCC 4.8.4
|
|
||||||
GCC 4.9.2
|
|
||||||
GCC 5.1.0
|
|
||||||
Intel 14.0.4
|
|
||||||
Intel 15.0.2
|
|
||||||
Clang 3.5.2
|
|
||||||
Clang 3.6.1
|
|
||||||
|
|
||||||
Secondary tested compilers are:
|
|
||||||
CUDA 6.5 (with gcc 4.7.2)
|
|
||||||
CUDA 7.0 (with gcc 4.7.2)
|
|
||||||
CUDA 7.5 (with gcc 4.7.2)
|
|
||||||
|
|
||||||
Other compilers working:
|
|
||||||
PGI 15.4
|
|
||||||
IBM XL 13.1.2
|
|
||||||
Cygwin 2.1.0 64bit with gcc 4.9.3
|
|
||||||
|
|
||||||
Primary tested compiler are passing in release mode
|
|
||||||
with warnings as errors. We are using the following set
|
|
||||||
of flags:
|
|
||||||
GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits
|
|
||||||
-Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
|
|
||||||
Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
|
|
||||||
Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
|
|
||||||
|
|
||||||
Secondary compilers are passing without -Werror.
|
|
||||||
Other compilers are tested occasionally.
|
|
||||||
|
|
||||||
============================================================================
|
|
||||||
====Getting started=========================================================
|
|
||||||
============================================================================
|
|
||||||
|
|
||||||
In the 'example/tutorial' directory you will find step by step tutorial
|
|
||||||
examples which explain many of the features of Kokkos. They work with
|
|
||||||
simple Makefiles. To build with g++ and OpenMP simply type 'make openmp'
|
|
||||||
in the 'example/tutorial' directory. This will build all examples in the
|
|
||||||
subfolders.
|
|
||||||
|
|
||||||
============================================================================
|
|
||||||
====Running Unit Tests======================================================
|
|
||||||
============================================================================
|
|
||||||
|
|
||||||
To run the unit tests create a build directory and run the following commands
|
|
||||||
|
|
||||||
KOKKOS_PATH/generate_makefile.bash
|
|
||||||
make build-test
|
|
||||||
make test
|
|
||||||
|
|
||||||
Run KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
|
|
||||||
changing the device type for which to build.
|
|
||||||
|
|
||||||
============================================================================
|
|
||||||
====Install the library=====================================================
|
|
||||||
============================================================================
|
|
||||||
|
|
||||||
To install Kokkos as a library create a build directory and run the following
|
|
||||||
|
|
||||||
KOKKOS_PATH/generate_makefile.bash --prefix=INSTALL_PATH
|
|
||||||
make lib
|
|
||||||
make install
|
|
||||||
|
|
||||||
KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
|
|
||||||
changing the device type for which to build.
|
|
||||||
|
|
||||||
============================================================================
|
|
||||||
====CMakeFiles==============================================================
|
|
||||||
============================================================================
|
|
||||||
|
|
||||||
The CMake files contained in this repository require Tribits and are used
|
|
||||||
for integration with Trilinos. They do not currently support a standalone
|
|
||||||
CMake build.
|
|
||||||
|
|
||||||
===========================================================================
|
|
||||||
====Kokkos and CUDA UVM====================================================
|
|
||||||
===========================================================================
|
|
||||||
|
|
||||||
Kokkos does support UVM as a specific memory space called CudaUVMSpace.
|
|
||||||
Allocations made with that space are accessible from host and device.
|
|
||||||
You can tell Kokkos to use that as the default space for Cuda allocations.
|
|
||||||
In either case UVM comes with a number of restrictions:
|
|
||||||
(i) You can't access allocations on the host while a kernel is potentially
|
|
||||||
running. This will lead to segfaults. To avoid that you either need to
|
|
||||||
call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or
|
|
||||||
you can set the environment variable CUDA_LAUNCH_BLOCKING=1.
|
|
||||||
Furthermore in multi socket multi GPU machines, UVM defaults to using
|
|
||||||
zero copy allocations for technical reasons related to using multiple
|
|
||||||
GPUs from the same process. If an executable doesn't do that (e.g. each
|
|
||||||
MPI rank of an application uses a single GPU [can be the same GPU for
|
|
||||||
multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
|
|
||||||
This will enforce proper UVM allocations, but can lead to errors if
|
|
||||||
more than a single GPU is used by a single process.
|
|
||||||
|
|
||||||
@ -1,10 +0,0 @@
|
|||||||
|
|
||||||
|
|
||||||
TRIBITS_SUBPACKAGE(Algorithms)
|
|
||||||
|
|
||||||
ADD_SUBDIRECTORY(src)
|
|
||||||
|
|
||||||
TRIBITS_ADD_TEST_DIRECTORIES(unit_tests)
|
|
||||||
#TRIBITS_ADD_TEST_DIRECTORIES(performance_tests)
|
|
||||||
|
|
||||||
TRIBITS_SUBPACKAGE_POSTPROCESS()
|
|
||||||
@ -1,5 +0,0 @@
|
|||||||
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
|
|
||||||
LIB_REQUIRED_PACKAGES KokkosCore
|
|
||||||
LIB_OPTIONAL_TPLS Pthread CUDA HWLOC
|
|
||||||
TEST_OPTIONAL_TPLS CUSPARSE
|
|
||||||
)
|
|
||||||
@ -1,4 +0,0 @@
|
|||||||
#ifndef KOKKOS_ALGORITHMS_CONFIG_H
|
|
||||||
#define KOKKOS_ALGORITHMS_CONFIG_H
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@ -1,21 +0,0 @@
|
|||||||
|
|
||||||
TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h)
|
|
||||||
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
FILE(GLOB HEADERS *.hpp)
|
|
||||||
FILE(GLOB SOURCES *.cpp)
|
|
||||||
LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
TRIBITS_ADD_LIBRARY(
|
|
||||||
kokkosalgorithms
|
|
||||||
HEADERS ${HEADERS}
|
|
||||||
SOURCES ${SOURCES}
|
|
||||||
DEPLIBS
|
|
||||||
)
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,496 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef KOKKOS_SORT_HPP_
|
|
||||||
#define KOKKOS_SORT_HPP_
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
|
|
||||||
namespace SortImpl {
|
|
||||||
|
|
||||||
template<class ValuesViewType, int Rank=ValuesViewType::Rank>
|
|
||||||
struct CopyOp;
|
|
||||||
|
|
||||||
template<class ValuesViewType>
|
|
||||||
struct CopyOp<ValuesViewType,1> {
|
|
||||||
template<class DstType, class SrcType>
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
static void copy(DstType& dst, size_t i_dst,
|
|
||||||
SrcType& src, size_t i_src ) {
|
|
||||||
dst(i_dst) = src(i_src);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ValuesViewType>
|
|
||||||
struct CopyOp<ValuesViewType,2> {
|
|
||||||
template<class DstType, class SrcType>
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
static void copy(DstType& dst, size_t i_dst,
|
|
||||||
SrcType& src, size_t i_src ) {
|
|
||||||
for(int j = 0;j< (int) dst.dimension_1(); j++)
|
|
||||||
dst(i_dst,j) = src(i_src,j);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ValuesViewType>
|
|
||||||
struct CopyOp<ValuesViewType,3> {
|
|
||||||
template<class DstType, class SrcType>
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
static void copy(DstType& dst, size_t i_dst,
|
|
||||||
SrcType& src, size_t i_src ) {
|
|
||||||
for(int j = 0; j<dst.dimension_1(); j++)
|
|
||||||
for(int k = 0; k<dst.dimension_2(); k++)
|
|
||||||
dst(i_dst,j,k) = src(i_src,j,k);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class KeyViewType, class BinSortOp, class ExecutionSpace = typename KeyViewType::execution_space,
|
|
||||||
class SizeType = typename KeyViewType::memory_space::size_type>
|
|
||||||
class BinSort {
|
|
||||||
|
|
||||||
|
|
||||||
public:
|
|
||||||
template<class ValuesViewType, class PermuteViewType, class CopyOp>
|
|
||||||
struct bin_sort_sort_functor {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef typename ValuesViewType::non_const_type values_view_type;
|
|
||||||
typedef typename ValuesViewType::const_type const_values_view_type;
|
|
||||||
Kokkos::View<typename values_view_type::const_data_type,typename values_view_type::array_layout,
|
|
||||||
typename values_view_type::memory_space,Kokkos::MemoryTraits<Kokkos::RandomAccess> > values;
|
|
||||||
values_view_type sorted_values;
|
|
||||||
typename PermuteViewType::const_type sort_order;
|
|
||||||
bin_sort_sort_functor(const_values_view_type values_, values_view_type sorted_values_, PermuteViewType sort_order_):
|
|
||||||
values(values_),sorted_values(sorted_values_),sort_order(sort_order_) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int& i) const {
|
|
||||||
//printf("Sort: %i %i\n",i,sort_order(i));
|
|
||||||
CopyOp::copy(sorted_values,i,values,sort_order(i));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef BinSortOp bin_op_type;
|
|
||||||
|
|
||||||
struct bin_count_tag {};
|
|
||||||
struct bin_offset_tag {};
|
|
||||||
struct bin_binning_tag {};
|
|
||||||
struct bin_sort_bins_tag {};
|
|
||||||
|
|
||||||
public:
|
|
||||||
typedef SizeType size_type;
|
|
||||||
typedef size_type value_type;
|
|
||||||
|
|
||||||
typedef Kokkos::View<size_type*, execution_space> offset_type;
|
|
||||||
typedef Kokkos::View<const int*, execution_space> bin_count_type;
|
|
||||||
|
|
||||||
|
|
||||||
typedef Kokkos::View<typename KeyViewType::const_data_type,
|
|
||||||
typename KeyViewType::array_layout,
|
|
||||||
typename KeyViewType::memory_space> const_key_view_type;
|
|
||||||
typedef Kokkos::View<typename KeyViewType::const_data_type,
|
|
||||||
typename KeyViewType::array_layout,
|
|
||||||
typename KeyViewType::memory_space,
|
|
||||||
Kokkos::MemoryTraits<Kokkos::RandomAccess> > const_rnd_key_view_type;
|
|
||||||
|
|
||||||
typedef typename KeyViewType::non_const_value_type non_const_key_scalar;
|
|
||||||
typedef typename KeyViewType::const_value_type const_key_scalar;
|
|
||||||
|
|
||||||
private:
|
|
||||||
const_key_view_type keys;
|
|
||||||
const_rnd_key_view_type keys_rnd;
|
|
||||||
|
|
||||||
public:
|
|
||||||
BinSortOp bin_op;
|
|
||||||
|
|
||||||
offset_type bin_offsets;
|
|
||||||
|
|
||||||
Kokkos::View<int*, ExecutionSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > bin_count_atomic;
|
|
||||||
bin_count_type bin_count_const;
|
|
||||||
|
|
||||||
offset_type sort_order;
|
|
||||||
|
|
||||||
bool sort_within_bins;
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
// Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false)
|
|
||||||
BinSort(const_key_view_type keys_, BinSortOp bin_op_,
|
|
||||||
bool sort_within_bins_ = false)
|
|
||||||
:keys(keys_),keys_rnd(keys_), bin_op(bin_op_) {
|
|
||||||
|
|
||||||
bin_count_atomic = Kokkos::View<int*, ExecutionSpace >("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins());
|
|
||||||
bin_count_const = bin_count_atomic;
|
|
||||||
bin_offsets = offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins());
|
|
||||||
sort_order = offset_type("PermutationVector",keys.dimension_0());
|
|
||||||
sort_within_bins = sort_within_bins_;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed
|
|
||||||
void create_permute_vector() {
|
|
||||||
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_count_tag> (0,keys.dimension_0()),*this);
|
|
||||||
Kokkos::parallel_scan(Kokkos::RangePolicy<ExecutionSpace,bin_offset_tag> (0,bin_op.max_bins()) ,*this);
|
|
||||||
|
|
||||||
Kokkos::deep_copy(bin_count_atomic,0);
|
|
||||||
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_binning_tag> (0,keys.dimension_0()),*this);
|
|
||||||
|
|
||||||
if(sort_within_bins)
|
|
||||||
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort a view with respect ot the first dimension using the permutation array
|
|
||||||
template<class ValuesViewType>
|
|
||||||
void sort(ValuesViewType values) {
|
|
||||||
ValuesViewType sorted_values = ValuesViewType("Copy",
|
|
||||||
values.dimension_0(),
|
|
||||||
values.dimension_1(),
|
|
||||||
values.dimension_2(),
|
|
||||||
values.dimension_3(),
|
|
||||||
values.dimension_4(),
|
|
||||||
values.dimension_5(),
|
|
||||||
values.dimension_6(),
|
|
||||||
values.dimension_7());
|
|
||||||
|
|
||||||
parallel_for(values.dimension_0(),
|
|
||||||
bin_sort_sort_functor<ValuesViewType, offset_type,
|
|
||||||
SortImpl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
|
|
||||||
|
|
||||||
deep_copy(values,sorted_values);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the permutation vector
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
offset_type get_permute_vector() const { return sort_order;}
|
|
||||||
|
|
||||||
// Get the start offsets for each bin
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
offset_type get_bin_offsets() const { return bin_offsets;}
|
|
||||||
|
|
||||||
// Get the count for each bin
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
bin_count_type get_bin_count() const {return bin_count_const;}
|
|
||||||
|
|
||||||
public:
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const bin_count_tag& tag, const int& i) const {
|
|
||||||
bin_count_atomic(bin_op.bin(keys,i))++;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const bin_offset_tag& tag, const int& i, value_type& offset, const bool& final) const {
|
|
||||||
if(final) {
|
|
||||||
bin_offsets(i) = offset;
|
|
||||||
}
|
|
||||||
offset+=bin_count_const(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const bin_binning_tag& tag, const int& i) const {
|
|
||||||
const int bin = bin_op.bin(keys,i);
|
|
||||||
const int count = bin_count_atomic(bin)++;
|
|
||||||
|
|
||||||
sort_order(bin_offsets(bin) + count) = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const bin_sort_bins_tag& tag, const int&i ) const {
|
|
||||||
bool sorted = false;
|
|
||||||
int upper_bound = bin_offsets(i)+bin_count_const(i);
|
|
||||||
while(!sorted) {
|
|
||||||
sorted = true;
|
|
||||||
int old_idx = sort_order(bin_offsets(i));
|
|
||||||
int new_idx;
|
|
||||||
for(int k=bin_offsets(i)+1; k<upper_bound; k++) {
|
|
||||||
new_idx = sort_order(k);
|
|
||||||
|
|
||||||
if(!bin_op(keys_rnd,old_idx,new_idx)) {
|
|
||||||
sort_order(k-1) = new_idx;
|
|
||||||
sort_order(k) = old_idx;
|
|
||||||
sorted = false;
|
|
||||||
} else {
|
|
||||||
old_idx = new_idx;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
upper_bound--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
namespace SortImpl {
|
|
||||||
|
|
||||||
template<class KeyViewType>
|
|
||||||
struct DefaultBinOp1D {
|
|
||||||
const int max_bins_;
|
|
||||||
const double mul_;
|
|
||||||
typename KeyViewType::const_value_type range_;
|
|
||||||
typename KeyViewType::const_value_type min_;
|
|
||||||
|
|
||||||
//Construct BinOp with number of bins, minimum value and maxuimum value
|
|
||||||
DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
|
|
||||||
typename KeyViewType::const_value_type max )
|
|
||||||
:max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {}
|
|
||||||
|
|
||||||
//Determine bin index from key value
|
|
||||||
template<class ViewType>
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
int bin(ViewType& keys, const int& i) const {
|
|
||||||
return int(mul_*(keys(i)-min_));
|
|
||||||
}
|
|
||||||
|
|
||||||
//Return maximum bin index + 1
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
int max_bins() const {
|
|
||||||
return max_bins_;
|
|
||||||
}
|
|
||||||
|
|
||||||
//Compare to keys within a bin if true new_val will be put before old_val
|
|
||||||
template<class ViewType, typename iType1, typename iType2>
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
bool operator()(ViewType& keys, iType1& i1, iType2& i2) const {
|
|
||||||
return keys(i1)<keys(i2);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class KeyViewType>
|
|
||||||
struct DefaultBinOp3D {
|
|
||||||
int max_bins_[3];
|
|
||||||
double mul_[3];
|
|
||||||
typename KeyViewType::non_const_value_type range_[3];
|
|
||||||
typename KeyViewType::non_const_value_type min_[3];
|
|
||||||
|
|
||||||
DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
|
|
||||||
typename KeyViewType::const_value_type max[] )
|
|
||||||
{
|
|
||||||
max_bins_[0] = max_bins__[0]+1;
|
|
||||||
max_bins_[1] = max_bins__[1]+1;
|
|
||||||
max_bins_[2] = max_bins__[2]+1;
|
|
||||||
mul_[0] = 1.0*max_bins__[0]/(max[0]-min[0]);
|
|
||||||
mul_[1] = 1.0*max_bins__[1]/(max[1]-min[1]);
|
|
||||||
mul_[2] = 1.0*max_bins__[2]/(max[2]-min[2]);
|
|
||||||
range_[0] = max[0]-min[0];
|
|
||||||
range_[1] = max[1]-min[1];
|
|
||||||
range_[2] = max[2]-min[2];
|
|
||||||
min_[0] = min[0];
|
|
||||||
min_[1] = min[1];
|
|
||||||
min_[2] = min[2];
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class ViewType>
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
int bin(ViewType& keys, const int& i) const {
|
|
||||||
return int( (((int(mul_[0]*(keys(i,0)-min_[0]))*max_bins_[1]) +
|
|
||||||
int(mul_[1]*(keys(i,1)-min_[1])))*max_bins_[2]) +
|
|
||||||
int(mul_[2]*(keys(i,2)-min_[2])));
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
int max_bins() const {
|
|
||||||
return max_bins_[0]*max_bins_[1]*max_bins_[2];
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class ViewType, typename iType1, typename iType2>
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
bool operator()(ViewType& keys, iType1& i1 , iType2& i2) const {
|
|
||||||
if (keys(i1,0)>keys(i2,0)) return true;
|
|
||||||
else if (keys(i1,0)==keys(i2,0)) {
|
|
||||||
if (keys(i1,1)>keys(i2,1)) return true;
|
|
||||||
else if (keys(i1,1)==keys(i2,2)) {
|
|
||||||
if (keys(i1,2)>keys(i2,2)) return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Scalar>
|
|
||||||
struct min_max {
|
|
||||||
Scalar min;
|
|
||||||
Scalar max;
|
|
||||||
bool init;
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
min_max() {
|
|
||||||
min = 0;
|
|
||||||
max = 0;
|
|
||||||
init = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
min_max (const min_max& val) {
|
|
||||||
min = val.min;
|
|
||||||
max = val.max;
|
|
||||||
init = val.init;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
min_max operator = (const min_max& val) {
|
|
||||||
min = val.min;
|
|
||||||
max = val.max;
|
|
||||||
init = val.init;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator+= (const Scalar& val) {
|
|
||||||
if(init) {
|
|
||||||
min = min<val?min:val;
|
|
||||||
max = max>val?max:val;
|
|
||||||
} else {
|
|
||||||
min = val;
|
|
||||||
max = val;
|
|
||||||
init = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator+= (const min_max& val) {
|
|
||||||
if(init && val.init) {
|
|
||||||
min = min<val.min?min:val.min;
|
|
||||||
max = max>val.max?max:val.max;
|
|
||||||
} else {
|
|
||||||
if(val.init) {
|
|
||||||
min = val.min;
|
|
||||||
max = val.max;
|
|
||||||
init = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator+= (volatile const Scalar& val) volatile {
|
|
||||||
if(init) {
|
|
||||||
min = min<val?min:val;
|
|
||||||
max = max>val?max:val;
|
|
||||||
} else {
|
|
||||||
min = val;
|
|
||||||
max = val;
|
|
||||||
init = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator+= (volatile const min_max& val) volatile {
|
|
||||||
if(init && val.init) {
|
|
||||||
min = min<val.min?min:val.min;
|
|
||||||
max = max>val.max?max:val.max;
|
|
||||||
} else {
|
|
||||||
if(val.init) {
|
|
||||||
min = val.min;
|
|
||||||
max = val.max;
|
|
||||||
init = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template<class ViewType>
|
|
||||||
struct min_max_functor {
|
|
||||||
typedef typename ViewType::execution_space execution_space;
|
|
||||||
ViewType view;
|
|
||||||
typedef min_max<typename ViewType::non_const_value_type> value_type;
|
|
||||||
min_max_functor (const ViewType view_):view(view_) {
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(const size_t& i, value_type& val) const {
|
|
||||||
val += view(i);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType>
|
|
||||||
bool try_std_sort(ViewType view) {
|
|
||||||
bool possible = true;
|
|
||||||
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
|
||||||
size_t stride[8];
|
|
||||||
view.stride(stride);
|
|
||||||
#else
|
|
||||||
size_t stride[8] = { view.stride_0()
|
|
||||||
, view.stride_1()
|
|
||||||
, view.stride_2()
|
|
||||||
, view.stride_3()
|
|
||||||
, view.stride_4()
|
|
||||||
, view.stride_5()
|
|
||||||
, view.stride_6()
|
|
||||||
, view.stride_7()
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
possible = possible && Impl::is_same<typename ViewType::memory_space, HostSpace>::value;
|
|
||||||
possible = possible && (ViewType::Rank == 1);
|
|
||||||
possible = possible && (stride[0] == 1);
|
|
||||||
if(possible) {
|
|
||||||
std::sort(view.ptr_on_device(),view.ptr_on_device()+view.dimension_0());
|
|
||||||
}
|
|
||||||
return possible;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class ViewType>
|
|
||||||
void sort(ViewType view, bool always_use_kokkos_sort = false) {
|
|
||||||
if(!always_use_kokkos_sort) {
|
|
||||||
if(SortImpl::try_std_sort(view)) return;
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef SortImpl::DefaultBinOp1D<ViewType> CompType;
|
|
||||||
SortImpl::min_max<typename ViewType::non_const_value_type> val;
|
|
||||||
parallel_reduce(view.dimension_0(),SortImpl::min_max_functor<ViewType>(view),val);
|
|
||||||
BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true);
|
|
||||||
bin_sort.create_permute_vector();
|
|
||||||
bin_sort.sort(view);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*template<class ViewType, class Comparator>
|
|
||||||
void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) {
|
|
||||||
|
|
||||||
}*/
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@ -1,38 +0,0 @@
|
|||||||
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
|
|
||||||
|
|
||||||
SET(SOURCES
|
|
||||||
UnitTestMain.cpp
|
|
||||||
TestCuda.cpp
|
|
||||||
)
|
|
||||||
|
|
||||||
SET(LIBRARIES kokkoscore)
|
|
||||||
|
|
||||||
IF(Kokkos_ENABLE_OpenMP)
|
|
||||||
LIST( APPEND SOURCES
|
|
||||||
TestOpenMP.cpp
|
|
||||||
)
|
|
||||||
ENDIF()
|
|
||||||
|
|
||||||
IF(Kokkos_ENABLE_Serial)
|
|
||||||
LIST( APPEND SOURCES
|
|
||||||
TestSerial.cpp
|
|
||||||
)
|
|
||||||
ENDIF()
|
|
||||||
|
|
||||||
IF(Kokkos_ENABLE_Pthread)
|
|
||||||
LIST( APPEND SOURCES
|
|
||||||
TestThreads.cpp
|
|
||||||
)
|
|
||||||
ENDIF()
|
|
||||||
|
|
||||||
TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
|
||||||
UnitTest
|
|
||||||
SOURCES ${SOURCES}
|
|
||||||
COMM serial mpi
|
|
||||||
NUM_MPI_PROCS 1
|
|
||||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
|
||||||
TESTONLYLIBS kokkos_gtest
|
|
||||||
)
|
|
||||||
@ -1,92 +0,0 @@
|
|||||||
KOKKOS_PATH = ../..
|
|
||||||
|
|
||||||
GTEST_PATH = ../../TPL/gtest
|
|
||||||
|
|
||||||
vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests
|
|
||||||
|
|
||||||
default: build_all
|
|
||||||
echo "End Build"
|
|
||||||
|
|
||||||
|
|
||||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|
||||||
CXX = $(NVCC_WRAPPER)
|
|
||||||
CXXFLAGS ?= -O3
|
|
||||||
LINK = $(CXX)
|
|
||||||
LDFLAGS ?= -lpthread
|
|
||||||
else
|
|
||||||
CXX ?= g++
|
|
||||||
CXXFLAGS ?= -O3
|
|
||||||
LINK ?= $(CXX)
|
|
||||||
LDFLAGS ?= -lpthread
|
|
||||||
endif
|
|
||||||
|
|
||||||
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests
|
|
||||||
|
|
||||||
TEST_TARGETS =
|
|
||||||
TARGETS =
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|
||||||
OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
|
|
||||||
TARGETS += KokkosAlgorithms_UnitTest_Cuda
|
|
||||||
TEST_TARGETS += test-cuda
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
|
||||||
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
|
|
||||||
TARGETS += KokkosAlgorithms_UnitTest_Threads
|
|
||||||
TEST_TARGETS += test-threads
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
|
||||||
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
|
|
||||||
TARGETS += KokkosAlgorithms_UnitTest_OpenMP
|
|
||||||
TEST_TARGETS += test-openmp
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
|
||||||
OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
|
|
||||||
TARGETS += KokkosAlgorithms_UnitTest_Serial
|
|
||||||
TEST_TARGETS += test-serial
|
|
||||||
endif
|
|
||||||
|
|
||||||
KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Cuda
|
|
||||||
|
|
||||||
KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Threads
|
|
||||||
|
|
||||||
KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_OpenMP
|
|
||||||
|
|
||||||
KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Serial
|
|
||||||
|
|
||||||
test-cuda: KokkosAlgorithms_UnitTest_Cuda
|
|
||||||
./KokkosAlgorithms_UnitTest_Cuda
|
|
||||||
|
|
||||||
test-threads: KokkosAlgorithms_UnitTest_Threads
|
|
||||||
./KokkosAlgorithms_UnitTest_Threads
|
|
||||||
|
|
||||||
test-openmp: KokkosAlgorithms_UnitTest_OpenMP
|
|
||||||
./KokkosAlgorithms_UnitTest_OpenMP
|
|
||||||
|
|
||||||
test-serial: KokkosAlgorithms_UnitTest_Serial
|
|
||||||
./KokkosAlgorithms_UnitTest_Serial
|
|
||||||
|
|
||||||
build_all: $(TARGETS)
|
|
||||||
|
|
||||||
test: $(TEST_TARGETS)
|
|
||||||
|
|
||||||
clean: kokkos-clean
|
|
||||||
rm -f *.o $(TARGETS)
|
|
||||||
|
|
||||||
# Compilation rules
|
|
||||||
|
|
||||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
|
||||||
|
|
||||||
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
|
|
||||||
|
|
||||||
@ -1,110 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <iomanip>
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#ifdef KOKKOS_HAVE_CUDA
|
|
||||||
|
|
||||||
#include <TestRandom.hpp>
|
|
||||||
#include <TestSort.hpp>
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
class cuda : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
static void SetUpTestCase()
|
|
||||||
{
|
|
||||||
std::cout << std::setprecision(5) << std::scientific;
|
|
||||||
Kokkos::HostSpace::execution_space::initialize();
|
|
||||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
|
|
||||||
}
|
|
||||||
static void TearDownTestCase()
|
|
||||||
{
|
|
||||||
Kokkos::Cuda::finalize();
|
|
||||||
Kokkos::HostSpace::execution_space::finalize();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
void cuda_test_random_xorshift64( int num_draws )
|
|
||||||
{
|
|
||||||
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Cuda> >(num_draws);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuda_test_random_xorshift1024( int num_draws )
|
|
||||||
{
|
|
||||||
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Cuda> >(num_draws);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#define CUDA_RANDOM_XORSHIFT64( num_draws ) \
|
|
||||||
TEST_F( cuda, Random_XorShift64 ) { \
|
|
||||||
cuda_test_random_xorshift64(num_draws); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CUDA_RANDOM_XORSHIFT1024( num_draws ) \
|
|
||||||
TEST_F( cuda, Random_XorShift1024 ) { \
|
|
||||||
cuda_test_random_xorshift1024(num_draws); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CUDA_SORT_UNSIGNED( size ) \
|
|
||||||
TEST_F( cuda, SortUnsigned ) { \
|
|
||||||
Impl::test_sort< Kokkos::Cuda, unsigned >(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
CUDA_RANDOM_XORSHIFT64( 132141141 )
|
|
||||||
CUDA_RANDOM_XORSHIFT1024( 52428813 )
|
|
||||||
CUDA_SORT_UNSIGNED(171)
|
|
||||||
|
|
||||||
#undef CUDA_RANDOM_XORSHIFT64
|
|
||||||
#undef CUDA_RANDOM_XORSHIFT1024
|
|
||||||
#undef CUDA_SORT_UNSIGNED
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifdef KOKKOS_HAVE_CUDA */
|
|
||||||
|
|
||||||
@ -1,102 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
#include <TestRandom.hpp>
|
|
||||||
#include <TestSort.hpp>
|
|
||||||
#include <iomanip>
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
#ifdef KOKKOS_HAVE_OPENMP
|
|
||||||
class openmp : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
static void SetUpTestCase()
|
|
||||||
{
|
|
||||||
std::cout << std::setprecision(5) << std::scientific;
|
|
||||||
|
|
||||||
unsigned threads_count = omp_get_max_threads();
|
|
||||||
|
|
||||||
if ( Kokkos::hwloc::available() ) {
|
|
||||||
threads_count = Kokkos::hwloc::get_available_numa_count() *
|
|
||||||
Kokkos::hwloc::get_available_cores_per_numa();
|
|
||||||
}
|
|
||||||
|
|
||||||
Kokkos::OpenMP::initialize( threads_count );
|
|
||||||
}
|
|
||||||
|
|
||||||
static void TearDownTestCase()
|
|
||||||
{
|
|
||||||
Kokkos::OpenMP::finalize();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
#define OPENMP_RANDOM_XORSHIFT64( num_draws ) \
|
|
||||||
TEST_F( openmp, Random_XorShift64 ) { \
|
|
||||||
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::OpenMP> >(num_draws); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define OPENMP_RANDOM_XORSHIFT1024( num_draws ) \
|
|
||||||
TEST_F( openmp, Random_XorShift1024 ) { \
|
|
||||||
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::OpenMP> >(num_draws); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define OPENMP_SORT_UNSIGNED( size ) \
|
|
||||||
TEST_F( openmp, SortUnsigned ) { \
|
|
||||||
Impl::test_sort< Kokkos::OpenMP, unsigned >(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
OPENMP_RANDOM_XORSHIFT64( 10240000 )
|
|
||||||
OPENMP_RANDOM_XORSHIFT1024( 10130144 )
|
|
||||||
OPENMP_SORT_UNSIGNED(171)
|
|
||||||
|
|
||||||
#undef OPENMP_RANDOM_XORSHIFT64
|
|
||||||
#undef OPENMP_RANDOM_XORSHIFT1024
|
|
||||||
#undef OPENMP_SORT_UNSIGNED
|
|
||||||
#endif
|
|
||||||
} // namespace test
|
|
||||||
|
|
||||||
@ -1,476 +0,0 @@
|
|||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
|
|
||||||
#ifndef KOKKOS_TEST_DUALVIEW_HPP
|
|
||||||
#define KOKKOS_TEST_DUALVIEW_HPP
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <impl/Kokkos_Timer.hpp>
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
#include <Kokkos_Random.hpp>
|
|
||||||
#include <cmath>
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
namespace Impl{
|
|
||||||
|
|
||||||
// This test runs the random number generators and uses some statistic tests to
|
|
||||||
// check the 'goodness' of the random numbers:
|
|
||||||
// (i) mean: the mean is expected to be 0.5*RAND_MAX
|
|
||||||
// (ii) variance: the variance is 1/3*mean*mean
|
|
||||||
// (iii) covariance: the covariance is 0
|
|
||||||
// (iv) 1-tupledistr: the mean, variance and covariance of a 1D Histrogram of random numbers
|
|
||||||
// (v) 3-tupledistr: the mean, variance and covariance of a 3D Histrogram of random numbers
|
|
||||||
|
|
||||||
#define HIST_DIM3D 24
|
|
||||||
#define HIST_DIM1D (HIST_DIM3D*HIST_DIM3D*HIST_DIM3D)
|
|
||||||
|
|
||||||
struct RandomProperties {
|
|
||||||
uint64_t count;
|
|
||||||
double mean;
|
|
||||||
double variance;
|
|
||||||
double covariance;
|
|
||||||
double min;
|
|
||||||
double max;
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
RandomProperties() {
|
|
||||||
count = 0;
|
|
||||||
mean = 0.0;
|
|
||||||
variance = 0.0;
|
|
||||||
covariance = 0.0;
|
|
||||||
min = 1e64;
|
|
||||||
max = -1e64;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
RandomProperties& operator+=(const RandomProperties& add) {
|
|
||||||
count += add.count;
|
|
||||||
mean += add.mean;
|
|
||||||
variance += add.variance;
|
|
||||||
covariance += add.covariance;
|
|
||||||
min = add.min<min?add.min:min;
|
|
||||||
max = add.max>max?add.max:max;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator+=(const volatile RandomProperties& add) volatile {
|
|
||||||
count += add.count;
|
|
||||||
mean += add.mean;
|
|
||||||
variance += add.variance;
|
|
||||||
covariance += add.covariance;
|
|
||||||
min = add.min<min?add.min:min;
|
|
||||||
max = add.max>max?add.max:max;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class GeneratorPool, class Scalar>
|
|
||||||
struct test_random_functor {
|
|
||||||
typedef typename GeneratorPool::generator_type rnd_type;
|
|
||||||
|
|
||||||
typedef RandomProperties value_type;
|
|
||||||
typedef typename GeneratorPool::device_type device_type;
|
|
||||||
|
|
||||||
GeneratorPool rand_pool;
|
|
||||||
const double mean;
|
|
||||||
|
|
||||||
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
|
|
||||||
// an exclusive upper bound on the range of random numbers that
|
|
||||||
// draw() can generate. However, for the float specialization, some
|
|
||||||
// implementations might violate this upper bound, due to rounding
|
|
||||||
// error. Just in case, we leave an extra space at the end of each
|
|
||||||
// dimension, in the View types below.
|
|
||||||
typedef Kokkos::View<int[HIST_DIM1D+1],typename GeneratorPool::device_type> type_1d;
|
|
||||||
type_1d density_1d;
|
|
||||||
typedef Kokkos::View<int[HIST_DIM3D+1][HIST_DIM3D+1][HIST_DIM3D+1],typename GeneratorPool::device_type> type_3d;
|
|
||||||
type_3d density_3d;
|
|
||||||
|
|
||||||
test_random_functor (GeneratorPool rand_pool_, type_1d d1d, type_3d d3d) :
|
|
||||||
rand_pool (rand_pool_),
|
|
||||||
mean (0.5*Kokkos::rand<rnd_type,Scalar>::max ()),
|
|
||||||
density_1d (d1d),
|
|
||||||
density_3d (d3d)
|
|
||||||
{}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (int i, RandomProperties& prop) const {
|
|
||||||
using Kokkos::atomic_fetch_add;
|
|
||||||
|
|
||||||
rnd_type rand_gen = rand_pool.get_state();
|
|
||||||
for (int k = 0; k < 1024; ++k) {
|
|
||||||
const Scalar tmp = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
|
|
||||||
prop.count++;
|
|
||||||
prop.mean += tmp;
|
|
||||||
prop.variance += (tmp-mean)*(tmp-mean);
|
|
||||||
const Scalar tmp2 = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
|
|
||||||
prop.count++;
|
|
||||||
prop.mean += tmp2;
|
|
||||||
prop.variance += (tmp2-mean)*(tmp2-mean);
|
|
||||||
prop.covariance += (tmp-mean)*(tmp2-mean);
|
|
||||||
const Scalar tmp3 = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
|
|
||||||
prop.count++;
|
|
||||||
prop.mean += tmp3;
|
|
||||||
prop.variance += (tmp3-mean)*(tmp3-mean);
|
|
||||||
prop.covariance += (tmp2-mean)*(tmp3-mean);
|
|
||||||
|
|
||||||
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to
|
|
||||||
// define an exclusive upper bound on the range of random
|
|
||||||
// numbers that draw() can generate. However, for the float
|
|
||||||
// specialization, some implementations might violate this upper
|
|
||||||
// bound, due to rounding error. Just in case, we have left an
|
|
||||||
// extra space at the end of each dimension of density_1d and
|
|
||||||
// density_3d.
|
|
||||||
//
|
|
||||||
// Please note that those extra entries might not get counted in
|
|
||||||
// the histograms. However, if Kokkos::rand is broken and only
|
|
||||||
// returns values of max(), the histograms will still catch this
|
|
||||||
// indirectly, since none of the other values will be filled in.
|
|
||||||
|
|
||||||
const Scalar theMax = Kokkos::rand<rnd_type, Scalar>::max ();
|
|
||||||
|
|
||||||
const uint64_t ind1_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp / theMax);
|
|
||||||
const uint64_t ind2_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp2 / theMax);
|
|
||||||
const uint64_t ind3_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp3 / theMax);
|
|
||||||
|
|
||||||
const uint64_t ind1_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp / theMax);
|
|
||||||
const uint64_t ind2_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp2 / theMax);
|
|
||||||
const uint64_t ind3_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp3 / theMax);
|
|
||||||
|
|
||||||
atomic_fetch_add (&density_1d(ind1_1d), 1);
|
|
||||||
atomic_fetch_add (&density_1d(ind2_1d), 1);
|
|
||||||
atomic_fetch_add (&density_1d(ind3_1d), 1);
|
|
||||||
atomic_fetch_add (&density_3d(ind1_3d, ind2_3d, ind3_3d), 1);
|
|
||||||
}
|
|
||||||
rand_pool.free_state(rand_gen);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
struct test_histogram1d_functor {
|
|
||||||
typedef RandomProperties value_type;
|
|
||||||
typedef typename DeviceType::execution_space execution_space;
|
|
||||||
typedef typename DeviceType::memory_space memory_space;
|
|
||||||
|
|
||||||
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
|
|
||||||
// an exclusive upper bound on the range of random numbers that
|
|
||||||
// draw() can generate. However, for the float specialization, some
|
|
||||||
// implementations might violate this upper bound, due to rounding
|
|
||||||
// error. Just in case, we leave an extra space at the end of each
|
|
||||||
// dimension, in the View type below.
|
|
||||||
typedef Kokkos::View<int[HIST_DIM1D+1], memory_space> type_1d;
|
|
||||||
type_1d density_1d;
|
|
||||||
double mean;
|
|
||||||
|
|
||||||
test_histogram1d_functor (type_1d d1d, int num_draws) :
|
|
||||||
density_1d (d1d),
|
|
||||||
mean (1.0*num_draws/HIST_DIM1D*3)
|
|
||||||
{
|
|
||||||
printf ("Mean: %e\n", mean);
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION void
|
|
||||||
operator() (const typename memory_space::size_type i,
|
|
||||||
RandomProperties& prop) const
|
|
||||||
{
|
|
||||||
typedef typename memory_space::size_type size_type;
|
|
||||||
const double count = density_1d(i);
|
|
||||||
prop.mean += count;
|
|
||||||
prop.variance += 1.0 * (count - mean) * (count - mean);
|
|
||||||
//prop.covariance += 1.0*count*count;
|
|
||||||
prop.min = count < prop.min ? count : prop.min;
|
|
||||||
prop.max = count > prop.max ? count : prop.max;
|
|
||||||
if (i < static_cast<size_type> (HIST_DIM1D-1)) {
|
|
||||||
prop.covariance += (count - mean) * (density_1d(i+1) - mean);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
struct test_histogram3d_functor {
|
|
||||||
typedef RandomProperties value_type;
|
|
||||||
typedef typename DeviceType::execution_space execution_space;
|
|
||||||
typedef typename DeviceType::memory_space memory_space;
|
|
||||||
|
|
||||||
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
|
|
||||||
// an exclusive upper bound on the range of random numbers that
|
|
||||||
// draw() can generate. However, for the float specialization, some
|
|
||||||
// implementations might violate this upper bound, due to rounding
|
|
||||||
// error. Just in case, we leave an extra space at the end of each
|
|
||||||
// dimension, in the View type below.
|
|
||||||
typedef Kokkos::View<int[HIST_DIM3D+1][HIST_DIM3D+1][HIST_DIM3D+1], memory_space> type_3d;
|
|
||||||
type_3d density_3d;
|
|
||||||
double mean;
|
|
||||||
|
|
||||||
test_histogram3d_functor (type_3d d3d, int num_draws) :
|
|
||||||
density_3d (d3d),
|
|
||||||
mean (1.0*num_draws/HIST_DIM1D)
|
|
||||||
{}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION void
|
|
||||||
operator() (const typename memory_space::size_type i,
|
|
||||||
RandomProperties& prop) const
|
|
||||||
{
|
|
||||||
typedef typename memory_space::size_type size_type;
|
|
||||||
const double count = density_3d(i/(HIST_DIM3D*HIST_DIM3D),
|
|
||||||
(i % (HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D,
|
|
||||||
i % HIST_DIM3D);
|
|
||||||
prop.mean += count;
|
|
||||||
prop.variance += (count - mean) * (count - mean);
|
|
||||||
if (i < static_cast<size_type> (HIST_DIM1D-1)) {
|
|
||||||
const double count_next = density_3d((i+1)/(HIST_DIM3D*HIST_DIM3D),
|
|
||||||
((i+1)%(HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D,
|
|
||||||
(i+1)%HIST_DIM3D);
|
|
||||||
prop.covariance += (count - mean) * (count_next - mean);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//
|
|
||||||
// Templated test that uses the above functors.
|
|
||||||
//
|
|
||||||
template <class RandomGenerator,class Scalar>
|
|
||||||
struct test_random_scalar {
|
|
||||||
typedef typename RandomGenerator::generator_type rnd_type;
|
|
||||||
|
|
||||||
int pass_mean,pass_var,pass_covar;
|
|
||||||
int pass_hist1d_mean,pass_hist1d_var,pass_hist1d_covar;
|
|
||||||
int pass_hist3d_mean,pass_hist3d_var,pass_hist3d_covar;
|
|
||||||
|
|
||||||
test_random_scalar (typename test_random_functor<RandomGenerator,int>::type_1d& density_1d,
|
|
||||||
typename test_random_functor<RandomGenerator,int>::type_3d& density_3d,
|
|
||||||
RandomGenerator& pool,
|
|
||||||
unsigned int num_draws)
|
|
||||||
{
|
|
||||||
using std::cerr;
|
|
||||||
using std::endl;
|
|
||||||
using Kokkos::parallel_reduce;
|
|
||||||
|
|
||||||
{
|
|
||||||
cerr << " -- Testing randomness properties" << endl;
|
|
||||||
|
|
||||||
RandomProperties result;
|
|
||||||
typedef test_random_functor<RandomGenerator, Scalar> functor_type;
|
|
||||||
parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result);
|
|
||||||
|
|
||||||
//printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2);
|
|
||||||
double tolerance = 2.0*sqrt(1.0/num_draws);
|
|
||||||
double mean_expect = 0.5*Kokkos::rand<rnd_type,Scalar>::max();
|
|
||||||
double variance_expect = 1.0/3.0*mean_expect*mean_expect;
|
|
||||||
double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0;
|
|
||||||
double variance_eps = variance_expect/(result.variance/num_draws/3)-1.0;
|
|
||||||
double covariance_eps = result.covariance/num_draws/2/variance_expect;
|
|
||||||
pass_mean = ((-tolerance < mean_eps) &&
|
|
||||||
( tolerance > mean_eps)) ? 1:0;
|
|
||||||
pass_var = ((-tolerance < variance_eps) &&
|
|
||||||
( tolerance > variance_eps)) ? 1:0;
|
|
||||||
pass_covar = ((-1.4*tolerance < covariance_eps) &&
|
|
||||||
( 1.4*tolerance > covariance_eps)) ? 1:0;
|
|
||||||
cerr << "Pass: " << pass_mean
|
|
||||||
<< " " << pass_var
|
|
||||||
<< " " << mean_eps
|
|
||||||
<< " " << variance_eps
|
|
||||||
<< " " << covariance_eps
|
|
||||||
<< " || " << tolerance << endl;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
cerr << " -- Testing 1-D histogram" << endl;
|
|
||||||
|
|
||||||
RandomProperties result;
|
|
||||||
typedef test_histogram1d_functor<typename RandomGenerator::device_type> functor_type;
|
|
||||||
parallel_reduce (HIST_DIM1D, functor_type (density_1d, num_draws), result);
|
|
||||||
|
|
||||||
double tolerance = 6*sqrt(1.0/HIST_DIM1D);
|
|
||||||
double mean_expect = 1.0*num_draws*3/HIST_DIM1D;
|
|
||||||
double variance_expect = 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D);
|
|
||||||
double covariance_expect = -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D;
|
|
||||||
double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0;
|
|
||||||
double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0;
|
|
||||||
double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect;
|
|
||||||
pass_hist1d_mean = ((-tolerance < mean_eps) &&
|
|
||||||
( tolerance > mean_eps)) ? 1:0;
|
|
||||||
pass_hist1d_var = ((-tolerance < variance_eps) &&
|
|
||||||
( tolerance > variance_eps)) ? 1:0;
|
|
||||||
pass_hist1d_covar = ((-tolerance < covariance_eps) &&
|
|
||||||
( tolerance > covariance_eps)) ? 1:0;
|
|
||||||
|
|
||||||
cerr << "Density 1D: " << mean_eps
|
|
||||||
<< " " << variance_eps
|
|
||||||
<< " " << (result.covariance/HIST_DIM1D/HIST_DIM1D)
|
|
||||||
<< " || " << tolerance
|
|
||||||
<< " " << result.min
|
|
||||||
<< " " << result.max
|
|
||||||
<< " || " << result.variance/HIST_DIM1D
|
|
||||||
<< " " << 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D)
|
|
||||||
<< " || " << result.covariance/HIST_DIM1D
|
|
||||||
<< " " << -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D
|
|
||||||
<< endl;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
cerr << " -- Testing 3-D histogram" << endl;
|
|
||||||
|
|
||||||
RandomProperties result;
|
|
||||||
typedef test_histogram3d_functor<typename RandomGenerator::device_type> functor_type;
|
|
||||||
parallel_reduce (HIST_DIM1D, functor_type (density_3d, num_draws), result);
|
|
||||||
|
|
||||||
double tolerance = 6*sqrt(1.0/HIST_DIM1D);
|
|
||||||
double mean_expect = 1.0*num_draws/HIST_DIM1D;
|
|
||||||
double variance_expect = 1.0*num_draws/HIST_DIM1D*(1.0-1.0/HIST_DIM1D);
|
|
||||||
double covariance_expect = -1.0*num_draws/HIST_DIM1D/HIST_DIM1D;
|
|
||||||
double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0;
|
|
||||||
double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0;
|
|
||||||
double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect;
|
|
||||||
pass_hist3d_mean = ((-tolerance < mean_eps) &&
|
|
||||||
( tolerance > mean_eps)) ? 1:0;
|
|
||||||
pass_hist3d_var = ((-tolerance < variance_eps) &&
|
|
||||||
( tolerance > variance_eps)) ? 1:0;
|
|
||||||
pass_hist3d_covar = ((-tolerance < covariance_eps) &&
|
|
||||||
( tolerance > covariance_eps)) ? 1:0;
|
|
||||||
|
|
||||||
cerr << "Density 3D: " << mean_eps
|
|
||||||
<< " " << variance_eps
|
|
||||||
<< " " << result.covariance/HIST_DIM1D/HIST_DIM1D
|
|
||||||
<< " || " << tolerance
|
|
||||||
<< " " << result.min
|
|
||||||
<< " " << result.max << endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <class RandomGenerator>
|
|
||||||
void test_random(unsigned int num_draws)
|
|
||||||
{
|
|
||||||
using std::cerr;
|
|
||||||
using std::endl;
|
|
||||||
typename test_random_functor<RandomGenerator,int>::type_1d density_1d("D1d");
|
|
||||||
typename test_random_functor<RandomGenerator,int>::type_3d density_3d("D3d");
|
|
||||||
|
|
||||||
cerr << "Test Scalar=int" << endl;
|
|
||||||
RandomGenerator pool(31891);
|
|
||||||
test_random_scalar<RandomGenerator,int> test_int(density_1d,density_3d,pool,num_draws);
|
|
||||||
ASSERT_EQ( test_int.pass_mean,1);
|
|
||||||
ASSERT_EQ( test_int.pass_var,1);
|
|
||||||
ASSERT_EQ( test_int.pass_covar,1);
|
|
||||||
ASSERT_EQ( test_int.pass_hist1d_mean,1);
|
|
||||||
ASSERT_EQ( test_int.pass_hist1d_var,1);
|
|
||||||
ASSERT_EQ( test_int.pass_hist1d_covar,1);
|
|
||||||
ASSERT_EQ( test_int.pass_hist3d_mean,1);
|
|
||||||
ASSERT_EQ( test_int.pass_hist3d_var,1);
|
|
||||||
ASSERT_EQ( test_int.pass_hist3d_covar,1);
|
|
||||||
deep_copy(density_1d,0);
|
|
||||||
deep_copy(density_3d,0);
|
|
||||||
|
|
||||||
cerr << "Test Scalar=unsigned int" << endl;
|
|
||||||
test_random_scalar<RandomGenerator,unsigned int> test_uint(density_1d,density_3d,pool,num_draws);
|
|
||||||
ASSERT_EQ( test_uint.pass_mean,1);
|
|
||||||
ASSERT_EQ( test_uint.pass_var,1);
|
|
||||||
ASSERT_EQ( test_uint.pass_covar,1);
|
|
||||||
ASSERT_EQ( test_uint.pass_hist1d_mean,1);
|
|
||||||
ASSERT_EQ( test_uint.pass_hist1d_var,1);
|
|
||||||
ASSERT_EQ( test_uint.pass_hist1d_covar,1);
|
|
||||||
ASSERT_EQ( test_uint.pass_hist3d_mean,1);
|
|
||||||
ASSERT_EQ( test_uint.pass_hist3d_var,1);
|
|
||||||
ASSERT_EQ( test_uint.pass_hist3d_covar,1);
|
|
||||||
deep_copy(density_1d,0);
|
|
||||||
deep_copy(density_3d,0);
|
|
||||||
|
|
||||||
cerr << "Test Scalar=int64_t" << endl;
|
|
||||||
test_random_scalar<RandomGenerator,int64_t> test_int64(density_1d,density_3d,pool,num_draws);
|
|
||||||
ASSERT_EQ( test_int64.pass_mean,1);
|
|
||||||
ASSERT_EQ( test_int64.pass_var,1);
|
|
||||||
ASSERT_EQ( test_int64.pass_covar,1);
|
|
||||||
ASSERT_EQ( test_int64.pass_hist1d_mean,1);
|
|
||||||
ASSERT_EQ( test_int64.pass_hist1d_var,1);
|
|
||||||
ASSERT_EQ( test_int64.pass_hist1d_covar,1);
|
|
||||||
ASSERT_EQ( test_int64.pass_hist3d_mean,1);
|
|
||||||
ASSERT_EQ( test_int64.pass_hist3d_var,1);
|
|
||||||
ASSERT_EQ( test_int64.pass_hist3d_covar,1);
|
|
||||||
deep_copy(density_1d,0);
|
|
||||||
deep_copy(density_3d,0);
|
|
||||||
|
|
||||||
cerr << "Test Scalar=uint64_t" << endl;
|
|
||||||
test_random_scalar<RandomGenerator,uint64_t> test_uint64(density_1d,density_3d,pool,num_draws);
|
|
||||||
ASSERT_EQ( test_uint64.pass_mean,1);
|
|
||||||
ASSERT_EQ( test_uint64.pass_var,1);
|
|
||||||
ASSERT_EQ( test_uint64.pass_covar,1);
|
|
||||||
ASSERT_EQ( test_uint64.pass_hist1d_mean,1);
|
|
||||||
ASSERT_EQ( test_uint64.pass_hist1d_var,1);
|
|
||||||
ASSERT_EQ( test_uint64.pass_hist1d_covar,1);
|
|
||||||
ASSERT_EQ( test_uint64.pass_hist3d_mean,1);
|
|
||||||
ASSERT_EQ( test_uint64.pass_hist3d_var,1);
|
|
||||||
ASSERT_EQ( test_uint64.pass_hist3d_covar,1);
|
|
||||||
deep_copy(density_1d,0);
|
|
||||||
deep_copy(density_3d,0);
|
|
||||||
|
|
||||||
cerr << "Test Scalar=float" << endl;
|
|
||||||
test_random_scalar<RandomGenerator,float> test_float(density_1d,density_3d,pool,num_draws);
|
|
||||||
ASSERT_EQ( test_float.pass_mean,1);
|
|
||||||
ASSERT_EQ( test_float.pass_var,1);
|
|
||||||
ASSERT_EQ( test_float.pass_covar,1);
|
|
||||||
ASSERT_EQ( test_float.pass_hist1d_mean,1);
|
|
||||||
ASSERT_EQ( test_float.pass_hist1d_var,1);
|
|
||||||
ASSERT_EQ( test_float.pass_hist1d_covar,1);
|
|
||||||
ASSERT_EQ( test_float.pass_hist3d_mean,1);
|
|
||||||
ASSERT_EQ( test_float.pass_hist3d_var,1);
|
|
||||||
ASSERT_EQ( test_float.pass_hist3d_covar,1);
|
|
||||||
deep_copy(density_1d,0);
|
|
||||||
deep_copy(density_3d,0);
|
|
||||||
|
|
||||||
cerr << "Test Scalar=double" << endl;
|
|
||||||
test_random_scalar<RandomGenerator,double> test_double(density_1d,density_3d,pool,num_draws);
|
|
||||||
ASSERT_EQ( test_double.pass_mean,1);
|
|
||||||
ASSERT_EQ( test_double.pass_var,1);
|
|
||||||
ASSERT_EQ( test_double.pass_covar,1);
|
|
||||||
ASSERT_EQ( test_double.pass_hist1d_mean,1);
|
|
||||||
ASSERT_EQ( test_double.pass_hist1d_var,1);
|
|
||||||
ASSERT_EQ( test_double.pass_hist1d_covar,1);
|
|
||||||
ASSERT_EQ( test_double.pass_hist3d_mean,1);
|
|
||||||
ASSERT_EQ( test_double.pass_hist3d_var,1);
|
|
||||||
ASSERT_EQ( test_double.pass_hist3d_covar,1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Test
|
|
||||||
|
|
||||||
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP
|
|
||||||
@ -1,99 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#include <TestRandom.hpp>
|
|
||||||
#include <TestSort.hpp>
|
|
||||||
#include <iomanip>
|
|
||||||
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
#ifdef KOKKOS_HAVE_SERIAL
|
|
||||||
class serial : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
static void SetUpTestCase()
|
|
||||||
{
|
|
||||||
std::cout << std::setprecision (5) << std::scientific;
|
|
||||||
Kokkos::Serial::initialize ();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void TearDownTestCase ()
|
|
||||||
{
|
|
||||||
Kokkos::Serial::finalize ();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
#define SERIAL_RANDOM_XORSHIFT64( num_draws ) \
|
|
||||||
TEST_F( serial, Random_XorShift64 ) { \
|
|
||||||
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Serial> >(num_draws); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SERIAL_RANDOM_XORSHIFT1024( num_draws ) \
|
|
||||||
TEST_F( serial, Random_XorShift1024 ) { \
|
|
||||||
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Serial> >(num_draws); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SERIAL_SORT_UNSIGNED( size ) \
|
|
||||||
TEST_F( serial, SortUnsigned ) { \
|
|
||||||
Impl::test_sort< Kokkos::Serial, unsigned >(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
SERIAL_RANDOM_XORSHIFT64( 10240000 )
|
|
||||||
SERIAL_RANDOM_XORSHIFT1024( 10130144 )
|
|
||||||
SERIAL_SORT_UNSIGNED(171)
|
|
||||||
|
|
||||||
#undef SERIAL_RANDOM_XORSHIFT64
|
|
||||||
#undef SERIAL_RANDOM_XORSHIFT1024
|
|
||||||
#undef SERIAL_SORT_UNSIGNED
|
|
||||||
|
|
||||||
#endif // KOKKOS_HAVE_SERIAL
|
|
||||||
} // namespace Test
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,206 +0,0 @@
|
|||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
|
|
||||||
#ifndef TESTSORT_HPP_
|
|
||||||
#define TESTSORT_HPP_
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
#include<Kokkos_Core.hpp>
|
|
||||||
#include<Kokkos_Random.hpp>
|
|
||||||
#include<Kokkos_Sort.hpp>
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
namespace Impl{
|
|
||||||
|
|
||||||
template<class ExecutionSpace, class Scalar>
|
|
||||||
struct is_sorted_struct {
|
|
||||||
typedef unsigned int value_type;
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
|
|
||||||
Kokkos::View<Scalar*,ExecutionSpace> keys;
|
|
||||||
|
|
||||||
is_sorted_struct(Kokkos::View<Scalar*,ExecutionSpace> keys_):keys(keys_) {}
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (int i, unsigned int& count) const {
|
|
||||||
if(keys(i)>keys(i+1)) count++;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ExecutionSpace, class Scalar>
|
|
||||||
struct sum {
|
|
||||||
typedef double value_type;
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
|
|
||||||
Kokkos::View<Scalar*,ExecutionSpace> keys;
|
|
||||||
|
|
||||||
sum(Kokkos::View<Scalar*,ExecutionSpace> keys_):keys(keys_) {}
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (int i, double& count) const {
|
|
||||||
count+=keys(i);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ExecutionSpace, class Scalar>
|
|
||||||
struct bin3d_is_sorted_struct {
|
|
||||||
typedef unsigned int value_type;
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
|
|
||||||
Kokkos::View<Scalar*[3],ExecutionSpace> keys;
|
|
||||||
|
|
||||||
int max_bins;
|
|
||||||
Scalar min;
|
|
||||||
Scalar max;
|
|
||||||
|
|
||||||
bin3d_is_sorted_struct(Kokkos::View<Scalar*[3],ExecutionSpace> keys_,int max_bins_,Scalar min_,Scalar max_):
|
|
||||||
keys(keys_),max_bins(max_bins_),min(min_),max(max_) {
|
|
||||||
}
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (int i, unsigned int& count) const {
|
|
||||||
int ix1 = int ((keys(i,0)-min)/max * max_bins);
|
|
||||||
int iy1 = int ((keys(i,1)-min)/max * max_bins);
|
|
||||||
int iz1 = int ((keys(i,2)-min)/max * max_bins);
|
|
||||||
int ix2 = int ((keys(i+1,0)-min)/max * max_bins);
|
|
||||||
int iy2 = int ((keys(i+1,1)-min)/max * max_bins);
|
|
||||||
int iz2 = int ((keys(i+1,2)-min)/max * max_bins);
|
|
||||||
|
|
||||||
if (ix1>ix2) count++;
|
|
||||||
else if(ix1==ix2) {
|
|
||||||
if (iy1>iy2) count++;
|
|
||||||
else if ((iy1==iy2) && (iz1>iz2)) count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ExecutionSpace, class Scalar>
|
|
||||||
struct sum3D {
|
|
||||||
typedef double value_type;
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
|
|
||||||
Kokkos::View<Scalar*[3],ExecutionSpace> keys;
|
|
||||||
|
|
||||||
sum3D(Kokkos::View<Scalar*[3],ExecutionSpace> keys_):keys(keys_) {}
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (int i, double& count) const {
|
|
||||||
count+=keys(i,0);
|
|
||||||
count+=keys(i,1);
|
|
||||||
count+=keys(i,2);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ExecutionSpace, typename KeyType>
|
|
||||||
void test_1D_sort(unsigned int n,bool force_kokkos) {
|
|
||||||
typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType;
|
|
||||||
KeyViewType keys("Keys",n);
|
|
||||||
|
|
||||||
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
|
|
||||||
Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND);
|
|
||||||
|
|
||||||
double sum_before = 0.0;
|
|
||||||
double sum_after = 0.0;
|
|
||||||
unsigned int sort_fails = 0;
|
|
||||||
|
|
||||||
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys),sum_before);
|
|
||||||
|
|
||||||
Kokkos::sort(keys,force_kokkos);
|
|
||||||
|
|
||||||
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys),sum_after);
|
|
||||||
Kokkos::parallel_reduce(n-1,is_sorted_struct<ExecutionSpace, KeyType>(keys),sort_fails);
|
|
||||||
|
|
||||||
double ratio = sum_before/sum_after;
|
|
||||||
double epsilon = 1e-10;
|
|
||||||
unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0;
|
|
||||||
|
|
||||||
ASSERT_EQ(sort_fails,0);
|
|
||||||
ASSERT_EQ(equal_sum,1);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class ExecutionSpace, typename KeyType>
|
|
||||||
void test_3D_sort(unsigned int n) {
|
|
||||||
typedef Kokkos::View<KeyType*[3],ExecutionSpace > KeyViewType;
|
|
||||||
|
|
||||||
KeyViewType keys("Keys",n*n*n);
|
|
||||||
|
|
||||||
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
|
|
||||||
Kokkos::fill_random(keys,g,100.0);
|
|
||||||
|
|
||||||
double sum_before = 0.0;
|
|
||||||
double sum_after = 0.0;
|
|
||||||
unsigned int sort_fails = 0;
|
|
||||||
|
|
||||||
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_before);
|
|
||||||
|
|
||||||
int bin_1d = 1;
|
|
||||||
while( bin_1d*bin_1d*bin_1d*4< (int) keys.dimension_0() ) bin_1d*=2;
|
|
||||||
int bin_max[3] = {bin_1d,bin_1d,bin_1d};
|
|
||||||
typename KeyViewType::value_type min[3] = {0,0,0};
|
|
||||||
typename KeyViewType::value_type max[3] = {100,100,100};
|
|
||||||
|
|
||||||
typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp;
|
|
||||||
BinOp bin_op(bin_max,min,max);
|
|
||||||
Kokkos::BinSort< KeyViewType , BinOp >
|
|
||||||
Sorter(keys,bin_op,false);
|
|
||||||
Sorter.create_permute_vector();
|
|
||||||
Sorter.template sort< KeyViewType >(keys);
|
|
||||||
|
|
||||||
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_after);
|
|
||||||
Kokkos::parallel_reduce(keys.dimension_0()-1,bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys,bin_1d,min[0],max[0]),sort_fails);
|
|
||||||
|
|
||||||
double ratio = sum_before/sum_after;
|
|
||||||
double epsilon = 1e-10;
|
|
||||||
unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0;
|
|
||||||
|
|
||||||
printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails);
|
|
||||||
ASSERT_EQ(sort_fails,0);
|
|
||||||
ASSERT_EQ(equal_sum,1);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class ExecutionSpace, typename KeyType>
|
|
||||||
void test_sort(unsigned int N)
|
|
||||||
{
|
|
||||||
test_1D_sort<ExecutionSpace,KeyType>(N*N*N, true);
|
|
||||||
test_1D_sort<ExecutionSpace,KeyType>(N*N*N, false);
|
|
||||||
test_3D_sort<ExecutionSpace,KeyType>(N);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif /* TESTSORT_HPP_ */
|
|
||||||
@ -1,113 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#include <TestRandom.hpp>
|
|
||||||
#include <TestSort.hpp>
|
|
||||||
#include <iomanip>
|
|
||||||
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
#ifdef KOKKOS_HAVE_PTHREAD
|
|
||||||
class threads : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
static void SetUpTestCase()
|
|
||||||
{
|
|
||||||
std::cout << std::setprecision(5) << std::scientific;
|
|
||||||
|
|
||||||
unsigned num_threads = 4;
|
|
||||||
|
|
||||||
if (Kokkos::hwloc::available()) {
|
|
||||||
num_threads = Kokkos::hwloc::get_available_numa_count()
|
|
||||||
* Kokkos::hwloc::get_available_cores_per_numa()
|
|
||||||
// * Kokkos::hwloc::get_available_threads_per_core()
|
|
||||||
;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "Threads: " << num_threads << std::endl;
|
|
||||||
|
|
||||||
Kokkos::Threads::initialize( num_threads );
|
|
||||||
}
|
|
||||||
|
|
||||||
static void TearDownTestCase()
|
|
||||||
{
|
|
||||||
Kokkos::Threads::finalize();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
#define THREADS_RANDOM_XORSHIFT64( num_draws ) \
|
|
||||||
TEST_F( threads, Random_XorShift64 ) { \
|
|
||||||
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Threads> >(num_draws); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define THREADS_RANDOM_XORSHIFT1024( num_draws ) \
|
|
||||||
TEST_F( threads, Random_XorShift1024 ) { \
|
|
||||||
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Threads> >(num_draws); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define THREADS_SORT_UNSIGNED( size ) \
|
|
||||||
TEST_F( threads, SortUnsigned ) { \
|
|
||||||
Impl::test_sort< Kokkos::Threads, double >(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
THREADS_RANDOM_XORSHIFT64( 10240000 )
|
|
||||||
THREADS_RANDOM_XORSHIFT1024( 10130144 )
|
|
||||||
THREADS_SORT_UNSIGNED(171)
|
|
||||||
|
|
||||||
#undef THREADS_RANDOM_XORSHIFT64
|
|
||||||
#undef THREADS_RANDOM_XORSHIFT1024
|
|
||||||
#undef THREADS_SORT_UNSIGNED
|
|
||||||
|
|
||||||
#endif
|
|
||||||
} // namespace Test
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,50 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
|
||||||
::testing::InitGoogleTest(&argc,argv);
|
|
||||||
return RUN_ALL_TESTS();
|
|
||||||
}
|
|
||||||
|
|
||||||
@ -1,10 +0,0 @@
|
|||||||
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
|
|
||||||
SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS
|
|
||||||
#SubPackageName Directory Class Req/Opt
|
|
||||||
#
|
|
||||||
# New Kokkos subpackages:
|
|
||||||
Core core PS REQUIRED
|
|
||||||
Containers containers PS OPTIONAL
|
|
||||||
Algorithms algorithms PS OPTIONAL
|
|
||||||
Example example EX OPTIONAL
|
|
||||||
)
|
|
||||||
@ -1,75 +0,0 @@
|
|||||||
# @HEADER
|
|
||||||
# ************************************************************************
|
|
||||||
#
|
|
||||||
# Trilinos: An Object-Oriented Solver Framework
|
|
||||||
# Copyright (2001) Sandia Corporation
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# Copyright (2001) Sandia Corporation. Under the terms of Contract
|
|
||||||
# DE-AC04-94AL85000, there is a non-exclusive license for use of this
|
|
||||||
# work by or on behalf of the U.S. Government. Export of this program
|
|
||||||
# may require a license from the United States Government.
|
|
||||||
#
|
|
||||||
# 1. Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
#
|
|
||||||
# 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
#
|
|
||||||
# 3. Neither the name of the Corporation nor the names of the
|
|
||||||
# contributors may be used to endorse or promote products derived from
|
|
||||||
# this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
# NOTICE: The United States Government is granted for itself and others
|
|
||||||
# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
|
|
||||||
# license in this data to reproduce, prepare derivative works, and
|
|
||||||
# perform publicly and display publicly. Beginning five (5) years from
|
|
||||||
# July 25, 2001, the United States Government is granted for itself and
|
|
||||||
# others acting on its behalf a paid-up, nonexclusive, irrevocable
|
|
||||||
# worldwide license in this data to reproduce, prepare derivative works,
|
|
||||||
# distribute copies to the public, perform publicly and display
|
|
||||||
# publicly, and to permit others to do so.
|
|
||||||
#
|
|
||||||
# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
|
|
||||||
# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
|
|
||||||
# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
|
|
||||||
# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
|
|
||||||
# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
|
|
||||||
# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
|
|
||||||
#
|
|
||||||
# ************************************************************************
|
|
||||||
# @HEADER
|
|
||||||
|
|
||||||
# Check for CUDA support
|
|
||||||
|
|
||||||
IF (NOT TPL_ENABLE_CUDA OR CUDA_VERSION VERSION_LESS "4.1")
|
|
||||||
MESSAGE(FATAL_ERROR "\nCUSPARSE: did not find acceptable version of CUDA libraries (4.1 or greater)")
|
|
||||||
ELSE()
|
|
||||||
IF(CMAKE_VERSION VERSION_LESS "2.8.8")
|
|
||||||
# FindCUDA before CMake 2.8.8 does not find cusparse library; therefore, we must
|
|
||||||
find_library(CUDA_cusparse_LIBRARY
|
|
||||||
cusparse
|
|
||||||
HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib
|
|
||||||
)
|
|
||||||
IF(CUDA_cusparse_LIBRARY STREQUAL "CUDA_cusparse_LIBRARY-NOTFOUND")
|
|
||||||
MESSAGE(FATAL_ERROR "\nCUSPARSE: could not find cuspasre library.")
|
|
||||||
ENDIF()
|
|
||||||
ENDIF(CMAKE_VERSION VERSION_LESS "2.8.8")
|
|
||||||
GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
|
|
||||||
GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
|
|
||||||
GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
|
|
||||||
ENDIF()
|
|
||||||
|
|
||||||
@ -1,71 +0,0 @@
|
|||||||
# @HEADER
|
|
||||||
# ************************************************************************
|
|
||||||
#
|
|
||||||
# Trilinos: An Object-Oriented Solver Framework
|
|
||||||
# Copyright (2001) Sandia Corporation
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# Copyright (2001) Sandia Corporation. Under the terms of Contract
|
|
||||||
# DE-AC04-94AL85000, there is a non-exclusive license for use of this
|
|
||||||
# work by or on behalf of the U.S. Government. Export of this program
|
|
||||||
# may require a license from the United States Government.
|
|
||||||
#
|
|
||||||
# 1. Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
#
|
|
||||||
# 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
#
|
|
||||||
# 3. Neither the name of the Corporation nor the names of the
|
|
||||||
# contributors may be used to endorse or promote products derived from
|
|
||||||
# this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
# NOTICE: The United States Government is granted for itself and others
|
|
||||||
# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
|
|
||||||
# license in this data to reproduce, prepare derivative works, and
|
|
||||||
# perform publicly and display publicly. Beginning five (5) years from
|
|
||||||
# July 25, 2001, the United States Government is granted for itself and
|
|
||||||
# others acting on its behalf a paid-up, nonexclusive, irrevocable
|
|
||||||
# worldwide license in this data to reproduce, prepare derivative works,
|
|
||||||
# distribute copies to the public, perform publicly and display
|
|
||||||
# publicly, and to permit others to do so.
|
|
||||||
#
|
|
||||||
# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
|
|
||||||
# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
|
|
||||||
# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
|
|
||||||
# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
|
|
||||||
# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
|
|
||||||
# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
|
|
||||||
#
|
|
||||||
# ************************************************************************
|
|
||||||
# @HEADER
|
|
||||||
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Hardware locality detection and control library.
|
|
||||||
#
|
|
||||||
# Acquisition information:
|
|
||||||
# Date checked: November 2011
|
|
||||||
# Checked by: H. Carter Edwards <hcedwar AT sandia.gov>
|
|
||||||
# Source: http://www.open-mpi.org/projects/hwloc/
|
|
||||||
# Version: 1.3
|
|
||||||
#
|
|
||||||
|
|
||||||
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC
|
|
||||||
REQUIRED_HEADERS hwloc.h
|
|
||||||
REQUIRED_LIBS_NAMES "hwloc"
|
|
||||||
)
|
|
||||||
|
|
||||||
@ -1,82 +0,0 @@
|
|||||||
# @HEADER
|
|
||||||
# ************************************************************************
|
|
||||||
#
|
|
||||||
# Trilinos: An Object-Oriented Solver Framework
|
|
||||||
# Copyright (2001) Sandia Corporation
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# Copyright (2001) Sandia Corporation. Under the terms of Contract
|
|
||||||
# DE-AC04-94AL85000, there is a non-exclusive license for use of this
|
|
||||||
# work by or on behalf of the U.S. Government. Export of this program
|
|
||||||
# may require a license from the United States Government.
|
|
||||||
#
|
|
||||||
# 1. Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
#
|
|
||||||
# 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
#
|
|
||||||
# 3. Neither the name of the Corporation nor the names of the
|
|
||||||
# contributors may be used to endorse or promote products derived from
|
|
||||||
# this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
# NOTICE: The United States Government is granted for itself and others
|
|
||||||
# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
|
|
||||||
# license in this data to reproduce, prepare derivative works, and
|
|
||||||
# perform publicly and display publicly. Beginning five (5) years from
|
|
||||||
# July 25, 2001, the United States Government is granted for itself and
|
|
||||||
# others acting on its behalf a paid-up, nonexclusive, irrevocable
|
|
||||||
# worldwide license in this data to reproduce, prepare derivative works,
|
|
||||||
# distribute copies to the public, perform publicly and display
|
|
||||||
# publicly, and to permit others to do so.
|
|
||||||
#
|
|
||||||
# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
|
|
||||||
# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
|
|
||||||
# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
|
|
||||||
# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
|
|
||||||
# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
|
|
||||||
# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
|
|
||||||
#
|
|
||||||
# ************************************************************************
|
|
||||||
# @HEADER
|
|
||||||
|
|
||||||
|
|
||||||
SET(USE_THREADS FALSE)
|
|
||||||
|
|
||||||
IF(NOT TPL_Pthread_INCLUDE_DIRS AND NOT TPL_Pthread_LIBRARY_DIRS AND NOT TPL_Pthread_LIBRARIES)
|
|
||||||
# Use CMake's Thread finder since it is a bit smarter in determining
|
|
||||||
# whether pthreads is already built into the compiler and doesn't need
|
|
||||||
# a library to link.
|
|
||||||
FIND_PACKAGE(Threads)
|
|
||||||
#If Threads found a copy of pthreads make sure it is one of the cases the tribits
|
|
||||||
#tpl system cannot handle.
|
|
||||||
IF(Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
|
|
||||||
IF(CMAKE_THREAD_LIBS_INIT STREQUAL "" OR CMAKE_THREAD_LIBS_INIT STREQUAL "-pthread")
|
|
||||||
SET(USE_THREADS TRUE)
|
|
||||||
ENDIF()
|
|
||||||
ENDIF()
|
|
||||||
ENDIF()
|
|
||||||
|
|
||||||
IF(USE_THREADS)
|
|
||||||
SET(TPL_Pthread_INCLUDE_DIRS "")
|
|
||||||
SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
|
|
||||||
SET(TPL_Pthread_LIBRARY_DIRS "")
|
|
||||||
ELSE()
|
|
||||||
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread
|
|
||||||
REQUIRED_HEADERS pthread.h
|
|
||||||
REQUIRED_LIBS_NAMES pthread
|
|
||||||
)
|
|
||||||
ENDIF()
|
|
||||||
@ -1,70 +0,0 @@
|
|||||||
# @HEADER
|
|
||||||
# ************************************************************************
|
|
||||||
#
|
|
||||||
# Trilinos: An Object-Oriented Solver Framework
|
|
||||||
# Copyright (2001) Sandia Corporation
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# Copyright (2001) Sandia Corporation. Under the terms of Contract
|
|
||||||
# DE-AC04-94AL85000, there is a non-exclusive license for use of this
|
|
||||||
# work by or on behalf of the U.S. Government. Export of this program
|
|
||||||
# may require a license from the United States Government.
|
|
||||||
#
|
|
||||||
# 1. Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
#
|
|
||||||
# 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
#
|
|
||||||
# 3. Neither the name of the Corporation nor the names of the
|
|
||||||
# contributors may be used to endorse or promote products derived from
|
|
||||||
# this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
# NOTICE: The United States Government is granted for itself and others
|
|
||||||
# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
|
|
||||||
# license in this data to reproduce, prepare derivative works, and
|
|
||||||
# perform publicly and display publicly. Beginning five (5) years from
|
|
||||||
# July 25, 2001, the United States Government is granted for itself and
|
|
||||||
# others acting on its behalf a paid-up, nonexclusive, irrevocable
|
|
||||||
# worldwide license in this data to reproduce, prepare derivative works,
|
|
||||||
# distribute copies to the public, perform publicly and display
|
|
||||||
# publicly, and to permit others to do so.
|
|
||||||
#
|
|
||||||
# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
|
|
||||||
# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
|
|
||||||
# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
|
|
||||||
# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
|
|
||||||
# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
|
|
||||||
# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
|
|
||||||
#
|
|
||||||
# ************************************************************************
|
|
||||||
# @HEADER
|
|
||||||
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Hardware locality detection and control library.
|
|
||||||
#
|
|
||||||
# Acquisition information:
|
|
||||||
# Date checked: July 2014
|
|
||||||
# Checked by: H. Carter Edwards <hcedwar AT sandia.gov>
|
|
||||||
# Source: https://code.google.com/p/qthreads
|
|
||||||
#
|
|
||||||
|
|
||||||
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD
|
|
||||||
REQUIRED_HEADERS qthread.h
|
|
||||||
REQUIRED_LIBS_NAMES "qthread"
|
|
||||||
)
|
|
||||||
|
|
||||||
@ -1,190 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
# Additional command-line arguments given to this script will be
|
|
||||||
# passed directly to CMake.
|
|
||||||
#
|
|
||||||
|
|
||||||
#
|
|
||||||
# Force CMake to re-evaluate build options.
|
|
||||||
#
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Incrementally construct cmake configure options:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Location of Trilinos source tree:
|
|
||||||
|
|
||||||
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
|
|
||||||
|
|
||||||
# Location for installation:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/host/`date +%F`"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# General build options.
|
|
||||||
# Use a variable so options can be propagated to CUDA compiler.
|
|
||||||
|
|
||||||
CMAKE_VERBOSE_MAKEFILE=OFF
|
|
||||||
CMAKE_BUILD_TYPE=RELEASE
|
|
||||||
# CMAKE_BUILD_TYPE=DEBUG
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Build for CUDA architecture:
|
|
||||||
|
|
||||||
CUDA_ARCH=""
|
|
||||||
# CUDA_ARCH="20"
|
|
||||||
# CUDA_ARCH="30"
|
|
||||||
# CUDA_ARCH="35"
|
|
||||||
|
|
||||||
# Build with Intel compiler
|
|
||||||
|
|
||||||
INTEL=ON
|
|
||||||
|
|
||||||
# Build for MIC architecture:
|
|
||||||
|
|
||||||
# INTEL_XEON_PHI=ON
|
|
||||||
|
|
||||||
# Build with HWLOC at location:
|
|
||||||
|
|
||||||
HWLOC_BASE_DIR="/home/projects/libraries/host/hwloc/1.6.2"
|
|
||||||
|
|
||||||
# Location for MPI to use in examples:
|
|
||||||
|
|
||||||
MPI_BASE_DIR=""
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# MPI configuation only used for examples:
|
|
||||||
#
|
|
||||||
# Must have the MPI_BASE_DIR so that the
|
|
||||||
# include path can be passed to the Cuda compiler
|
|
||||||
|
|
||||||
if [ -n "${MPI_BASE_DIR}" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
|
|
||||||
else
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Pthread configuation:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# OpenMP configuation:
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure packages for kokkos-only:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Hardware locality cmake configuration:
|
|
||||||
|
|
||||||
if [ -n "${HWLOC_BASE_DIR}" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Cuda cmake configuration:
|
|
||||||
|
|
||||||
if [ -n "${CUDA_ARCH}" ] ;
|
|
||||||
then
|
|
||||||
|
|
||||||
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
|
|
||||||
# this is different than the standard CMAKE_CXX_FLAGS syntax.
|
|
||||||
|
|
||||||
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
|
|
||||||
|
|
||||||
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
|
|
||||||
then
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
|
|
||||||
else
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
|
|
||||||
fi
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# Cross-compile for Intel Xeon Phi:
|
|
||||||
|
|
||||||
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
|
|
||||||
|
|
||||||
# Cannot cross-compile fortran compatibility checks on the MIC:
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
|
|
||||||
# Tell cmake the answers to compile-and-execute tests
|
|
||||||
# to prevent cmake from executing a cross-compiled program.
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,186 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
# Additional command-line arguments given to this script will be
|
|
||||||
# passed directly to CMake.
|
|
||||||
#
|
|
||||||
|
|
||||||
#
|
|
||||||
# Force CMake to re-evaluate build options.
|
|
||||||
#
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Incrementally construct cmake configure options:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Location of Trilinos source tree:
|
|
||||||
|
|
||||||
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
|
|
||||||
|
|
||||||
# Location for installation:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/mic/`date +%F`"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# General build options.
|
|
||||||
# Use a variable so options can be propagated to CUDA compiler.
|
|
||||||
|
|
||||||
CMAKE_VERBOSE_MAKEFILE=OFF
|
|
||||||
CMAKE_BUILD_TYPE=RELEASE
|
|
||||||
# CMAKE_BUILD_TYPE=DEBUG
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Build for CUDA architecture:
|
|
||||||
|
|
||||||
CUDA_ARCH=""
|
|
||||||
# CUDA_ARCH="20"
|
|
||||||
# CUDA_ARCH="30"
|
|
||||||
# CUDA_ARCH="35"
|
|
||||||
|
|
||||||
# Build for MIC architecture:
|
|
||||||
|
|
||||||
INTEL_XEON_PHI=ON
|
|
||||||
|
|
||||||
# Build with HWLOC at location:
|
|
||||||
|
|
||||||
HWLOC_BASE_DIR="/home/projects/libraries/mic/hwloc/1.6.2"
|
|
||||||
|
|
||||||
# Location for MPI to use in examples:
|
|
||||||
|
|
||||||
MPI_BASE_DIR=""
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# MPI configuation only used for examples:
|
|
||||||
#
|
|
||||||
# Must have the MPI_BASE_DIR so that the
|
|
||||||
# include path can be passed to the Cuda compiler
|
|
||||||
|
|
||||||
if [ -n "${MPI_BASE_DIR}" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
|
|
||||||
else
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Pthread configuation:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# OpenMP configuation:
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure packages for kokkos-only:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Hardware locality cmake configuration:
|
|
||||||
|
|
||||||
if [ -n "${HWLOC_BASE_DIR}" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Cuda cmake configuration:
|
|
||||||
|
|
||||||
if [ -n "${CUDA_ARCH}" ] ;
|
|
||||||
then
|
|
||||||
|
|
||||||
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
|
|
||||||
# this is different than the standard CMAKE_CXX_FLAGS syntax.
|
|
||||||
|
|
||||||
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
|
|
||||||
|
|
||||||
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
|
|
||||||
then
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
|
|
||||||
else
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
|
|
||||||
fi
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# Cross-compile for Intel Xeon Phi:
|
|
||||||
|
|
||||||
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
|
|
||||||
|
|
||||||
# Cannot cross-compile fortran compatibility checks on the MIC:
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
|
|
||||||
# Tell cmake the answers to compile-and-execute tests
|
|
||||||
# to prevent cmake from executing a cross-compiled program.
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,293 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# General build options.
|
|
||||||
# Use a variable so options can be propagated to CUDA compiler.
|
|
||||||
|
|
||||||
CMAKE_BUILD_TYPE=RELEASE
|
|
||||||
# CMAKE_BUILD_TYPE=DEBUG
|
|
||||||
|
|
||||||
# Source and installation directories:
|
|
||||||
|
|
||||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
|
||||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
USE_CUDA_ARCH=
|
|
||||||
USE_THREAD=
|
|
||||||
USE_OPENMP=
|
|
||||||
USE_INTEL=
|
|
||||||
USE_XEON_PHI=
|
|
||||||
HWLOC_BASE_DIR=
|
|
||||||
MPI_BASE_DIR=
|
|
||||||
BLAS_LIB_DIR=
|
|
||||||
LAPACK_LIB_DIR=
|
|
||||||
|
|
||||||
if [ 1 ] ; then
|
|
||||||
# Platform 'kokkos-dev' with Cuda, OpenMP, hwloc, mpi, gnu
|
|
||||||
USE_CUDA_ARCH="35"
|
|
||||||
USE_OPENMP=ON
|
|
||||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
|
|
||||||
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
|
|
||||||
BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib"
|
|
||||||
LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib"
|
|
||||||
|
|
||||||
elif [ ] ; then
|
|
||||||
# Platform 'kokkos-dev' with Cuda, Threads, hwloc, mpi, gnu
|
|
||||||
USE_CUDA_ARCH="35"
|
|
||||||
USE_THREAD=ON
|
|
||||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
|
|
||||||
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
|
|
||||||
BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib"
|
|
||||||
LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib"
|
|
||||||
|
|
||||||
elif [ ] ; then
|
|
||||||
# Platform 'kokkos-dev' with Xeon Phi and hwloc
|
|
||||||
USE_OPENMP=ON
|
|
||||||
USE_INTEL=ON
|
|
||||||
USE_XEON_PHI=ON
|
|
||||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/mic/intel/13.SP1.1.106"
|
|
||||||
|
|
||||||
elif [ ] ; then
|
|
||||||
# Platform 'kokkos-nvidia' with Cuda, OpenMP, hwloc, mpi, gnu
|
|
||||||
USE_CUDA_ARCH="20"
|
|
||||||
USE_OPENMP=ON
|
|
||||||
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
|
|
||||||
MPI_BASE_DIR="/home/sems/common/openmpi/current"
|
|
||||||
|
|
||||||
elif [ ] ; then
|
|
||||||
# Platform 'kokkos-nvidia' with Cuda, Threads, hwloc, mpi, gnu
|
|
||||||
USE_CUDA_ARCH="20"
|
|
||||||
USE_THREAD=ON
|
|
||||||
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
|
|
||||||
MPI_BASE_DIR="/home/sems/common/openmpi/current"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Incrementally construct cmake configure command line options:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
CMAKE_CXX_FLAGS=""
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure for Kokkos subpackages and tests:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
if [ 1 ] ; then
|
|
||||||
|
|
||||||
# Configure for Tpetra/Kokkos:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${BLAS_LIB_DIR}"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_DIRS:FILEPATH=${LAPACK_LIB_DIR}"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Tpetra:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Kokkos:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraClassic:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Tpetra_ENABLE_Kokkos_Refactor:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D KokkosClassic_DefaultNode:STRING=Kokkos::Compat::KokkosOpenMPWrapperNode"
|
|
||||||
|
|
||||||
CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}-DKOKKOS_FAST_COMPILE"
|
|
||||||
|
|
||||||
if [ -n "${USE_CUDA_ARCH}" ] ; then
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Cuda:BOOL=ON"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ 1 ] ; then
|
|
||||||
|
|
||||||
# Configure for Stokhos:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Sacado:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Stokhos:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Stokhos_ENABLE_Belos:BOOL=ON"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ 1 ] ; then
|
|
||||||
|
|
||||||
# Configure for TrilinosCouplings:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TrilinosCouplings:BOOL=ON"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON"
|
|
||||||
|
|
||||||
if [ "${CMAKE_BUILD_TYPE}" == "DEBUG" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Location for installation:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# MPI configuation only used for examples:
|
|
||||||
#
|
|
||||||
# Must have the MPI_BASE_DIR so that the
|
|
||||||
# include path can be passed to the Cuda compiler
|
|
||||||
|
|
||||||
if [ -n "${MPI_BASE_DIR}" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
|
|
||||||
else
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Kokkos use pthread configuation:
|
|
||||||
|
|
||||||
if [ "${USE_THREAD}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
|
|
||||||
else
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Kokkos use OpenMP configuation:
|
|
||||||
|
|
||||||
if [ "${USE_OPENMP}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
else
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=OFF"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Hardware locality configuration:
|
|
||||||
|
|
||||||
if [ -n "${HWLOC_BASE_DIR}" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Cuda cmake configuration:
|
|
||||||
|
|
||||||
if [ -n "${USE_CUDA_ARCH}" ] ;
|
|
||||||
then
|
|
||||||
|
|
||||||
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
|
|
||||||
# this is different than the standard CMAKE_CXX_FLAGS syntax.
|
|
||||||
|
|
||||||
CUDA_NVCC_FLAGS="-DKOKKOS_HAVE_CUDA_ARCH=${USE_CUDA_ARCH}0;-gencode;arch=compute_${USE_CUDA_ARCH},code=sm_${USE_CUDA_ARCH}"
|
|
||||||
|
|
||||||
if [ "${USE_OPENMP}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
|
|
||||||
else
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
|
|
||||||
then
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
|
|
||||||
else
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
|
|
||||||
fi
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
if [ "${USE_INTEL}" = "ON" -o "${USE_XEON_PHI}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Cross-compile for Intel Xeon Phi:
|
|
||||||
|
|
||||||
if [ "${USE_XEON_PHI}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
|
|
||||||
CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -mmic"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
|
|
||||||
|
|
||||||
# Cannot cross-compile fortran compatibility checks on the MIC:
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
|
|
||||||
# Tell cmake the answers to compile-and-execute tests
|
|
||||||
# to prevent cmake from executing a cross-compiled program.
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
if [ -n "${CMAKE_CXX_FLAGS}" ] ; then
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING='${CMAKE_CXX_FLAGS}'"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
# Remove CMake output files to force reconfigure from scratch.
|
|
||||||
#
|
|
||||||
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#
|
|
||||||
|
|
||||||
echo "cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}"
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,88 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
# Additional command-line arguments given to this script will be
|
|
||||||
# passed directly to CMake.
|
|
||||||
#
|
|
||||||
|
|
||||||
# to build:
|
|
||||||
# build on bgq-b[1-12]
|
|
||||||
# module load sierra-devel
|
|
||||||
# run this configure file
|
|
||||||
# make
|
|
||||||
|
|
||||||
# to run:
|
|
||||||
# ssh bgq-login
|
|
||||||
# cd /scratch/username/...
|
|
||||||
# export OMP_PROC_BIND and XLSMPOPTS environment variables
|
|
||||||
# run with srun
|
|
||||||
|
|
||||||
# Note: hwloc does not work to get or set cpubindings on bgq.
|
|
||||||
# Use the openmp backend and the openmp environment variables.
|
|
||||||
#
|
|
||||||
# Only the mpi wrappers seem to be setup for cross-compile,
|
|
||||||
# so it is important that this configure enables MPI and uses mpigcc wrappers.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
# Force CMake to re-evaluate build options.
|
|
||||||
#
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Incrementally construct cmake configure options:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Location of Trilinos source tree:
|
|
||||||
|
|
||||||
CMAKE_PROJECT_DIR="../Trilinos"
|
|
||||||
|
|
||||||
# Location for installation:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=../TrilinosInstall/`date +%F`"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# General build options.
|
|
||||||
# Use a variable so options can be propagated to CUDA compiler.
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=mpigcc-4.7.2"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=mpig++-4.7.2"
|
|
||||||
|
|
||||||
CMAKE_VERBOSE_MAKEFILE=OFF
|
|
||||||
CMAKE_BUILD_TYPE=RELEASE
|
|
||||||
# CMAKE_BUILD_TYPE=DEBUG
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure packages for kokkos-only:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,216 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
# Additional command-line arguments given to this script will be
|
|
||||||
# passed directly to CMake.
|
|
||||||
#
|
|
||||||
|
|
||||||
#
|
|
||||||
# Force CMake to re-evaluate build options.
|
|
||||||
#
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Incrementally construct cmake configure options:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Location of Trilinos source tree:
|
|
||||||
|
|
||||||
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
|
|
||||||
|
|
||||||
# Location for installation:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${HOME}/TrilinosInstall/`date +%F`"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# General build options.
|
|
||||||
# Use a variable so options can be propagated to CUDA compiler.
|
|
||||||
|
|
||||||
CMAKE_VERBOSE_MAKEFILE=OFF
|
|
||||||
CMAKE_BUILD_TYPE=RELEASE
|
|
||||||
#CMAKE_BUILD_TYPE=DEBUG
|
|
||||||
#CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Build for CUDA architecture:
|
|
||||||
|
|
||||||
#CUDA_ARCH=""
|
|
||||||
#CUDA_ARCH="20"
|
|
||||||
#CUDA_ARCH="30"
|
|
||||||
CUDA_ARCH="35"
|
|
||||||
|
|
||||||
# Build with OpenMP
|
|
||||||
|
|
||||||
OPENMP=ON
|
|
||||||
PTHREADS=ON
|
|
||||||
|
|
||||||
# Build host code with Intel compiler:
|
|
||||||
|
|
||||||
INTEL=OFF
|
|
||||||
|
|
||||||
# Build for MIC architecture:
|
|
||||||
|
|
||||||
INTEL_XEON_PHI=OFF
|
|
||||||
|
|
||||||
# Build with HWLOC at location:
|
|
||||||
|
|
||||||
#HWLOC_BASE_DIR=""
|
|
||||||
#HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
|
|
||||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
|
|
||||||
|
|
||||||
# Location for MPI to use in examples:
|
|
||||||
|
|
||||||
#MPI_BASE_DIR=""
|
|
||||||
#MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
|
|
||||||
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.7.3"
|
|
||||||
#MPI_BASE_DIR="/home/projects/openmpi/1.7.3/llvm/2013-12-02/"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# MPI configuation only used for examples:
|
|
||||||
#
|
|
||||||
# Must have the MPI_BASE_DIR so that the
|
|
||||||
# include path can be passed to the Cuda compiler
|
|
||||||
|
|
||||||
if [ -n "${MPI_BASE_DIR}" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
|
|
||||||
else
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Pthread configuation:
|
|
||||||
|
|
||||||
if [ "${PTHREADS}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
|
||||||
else
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# OpenMP configuation:
|
|
||||||
|
|
||||||
if [ "${OPENMP}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
else
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure packages for kokkos-only:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Hardware locality cmake configuration:
|
|
||||||
|
|
||||||
if [ -n "${HWLOC_BASE_DIR}" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Cuda cmake configuration:
|
|
||||||
|
|
||||||
if [ -n "${CUDA_ARCH}" ] ;
|
|
||||||
then
|
|
||||||
|
|
||||||
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
|
|
||||||
# this is different than the standard CMAKE_CXX_FLAGS syntax.
|
|
||||||
|
|
||||||
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
|
|
||||||
|
|
||||||
if [ "${OPENMP}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
|
|
||||||
else
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
|
|
||||||
then
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
|
|
||||||
else
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
|
|
||||||
fi
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# Cross-compile for Intel Xeon Phi:
|
|
||||||
|
|
||||||
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
|
|
||||||
|
|
||||||
# Cannot cross-compile fortran compatibility checks on the MIC:
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
|
|
||||||
# Tell cmake the answers to compile-and-execute tests
|
|
||||||
# to prevent cmake from executing a cross-compiled program.
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,204 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
# Additional command-line arguments given to this script will be
|
|
||||||
# passed directly to CMake.
|
|
||||||
#
|
|
||||||
|
|
||||||
#
|
|
||||||
# Force CMake to re-evaluate build options.
|
|
||||||
#
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Incrementally construct cmake configure options:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Location of Trilinos source tree:
|
|
||||||
|
|
||||||
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
|
|
||||||
|
|
||||||
# Location for installation:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/sems/common/kokkos/`date +%F`"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# General build options.
|
|
||||||
# Use a variable so options can be propagated to CUDA compiler.
|
|
||||||
|
|
||||||
CMAKE_VERBOSE_MAKEFILE=OFF
|
|
||||||
CMAKE_BUILD_TYPE=RELEASE
|
|
||||||
# CMAKE_BUILD_TYPE=DEBUG
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Build for CUDA architecture:
|
|
||||||
|
|
||||||
# CUDA_ARCH=""
|
|
||||||
CUDA_ARCH="20"
|
|
||||||
# CUDA_ARCH="30"
|
|
||||||
# CUDA_ARCH="35"
|
|
||||||
|
|
||||||
# Build with OpenMP
|
|
||||||
|
|
||||||
OPENMP=ON
|
|
||||||
|
|
||||||
# Build host code with Intel compiler:
|
|
||||||
|
|
||||||
# INTEL=ON
|
|
||||||
|
|
||||||
# Build for MIC architecture:
|
|
||||||
|
|
||||||
# INTEL_XEON_PHI=ON
|
|
||||||
|
|
||||||
# Build with HWLOC at location:
|
|
||||||
|
|
||||||
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
|
|
||||||
|
|
||||||
# Location for MPI to use in examples:
|
|
||||||
|
|
||||||
MPI_BASE_DIR="/home/sems/common/openmpi/current"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# MPI configuation only used for examples:
|
|
||||||
#
|
|
||||||
# Must have the MPI_BASE_DIR so that the
|
|
||||||
# include path can be passed to the Cuda compiler
|
|
||||||
|
|
||||||
if [ -n "${MPI_BASE_DIR}" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
|
|
||||||
else
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Pthread configuation:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# OpenMP configuation:
|
|
||||||
|
|
||||||
if [ "${OPENMP}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
else
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure packages for kokkos-only:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Hardware locality cmake configuration:
|
|
||||||
|
|
||||||
if [ -n "${HWLOC_BASE_DIR}" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Cuda cmake configuration:
|
|
||||||
|
|
||||||
if [ -n "${CUDA_ARCH}" ] ;
|
|
||||||
then
|
|
||||||
|
|
||||||
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
|
|
||||||
# this is different than the standard CMAKE_CXX_FLAGS syntax.
|
|
||||||
|
|
||||||
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
|
|
||||||
|
|
||||||
if [ "${OPENMP}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
|
|
||||||
else
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
|
|
||||||
then
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
|
|
||||||
else
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
|
|
||||||
fi
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# Cross-compile for Intel Xeon Phi:
|
|
||||||
|
|
||||||
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
|
|
||||||
|
|
||||||
# Cannot cross-compile fortran compatibility checks on the MIC:
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
|
|
||||||
# Tell cmake the answers to compile-and-execute tests
|
|
||||||
# to prevent cmake from executing a cross-compiled program.
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,190 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
# Additional command-line arguments given to this script will be
|
|
||||||
# passed directly to CMake.
|
|
||||||
#
|
|
||||||
|
|
||||||
#
|
|
||||||
# Force CMake to re-evaluate build options.
|
|
||||||
#
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Incrementally construct cmake configure options:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Location of Trilinos source tree:
|
|
||||||
|
|
||||||
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
|
|
||||||
|
|
||||||
# Location for installation:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/`date +%F`"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# General build options.
|
|
||||||
# Use a variable so options can be propagated to CUDA compiler.
|
|
||||||
|
|
||||||
CMAKE_VERBOSE_MAKEFILE=OFF
|
|
||||||
CMAKE_BUILD_TYPE=RELEASE
|
|
||||||
# CMAKE_BUILD_TYPE=DEBUG
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Build for CUDA architecture:
|
|
||||||
|
|
||||||
# CUDA_ARCH=""
|
|
||||||
# CUDA_ARCH="20"
|
|
||||||
# CUDA_ARCH="30"
|
|
||||||
CUDA_ARCH="35"
|
|
||||||
|
|
||||||
# Build host code with Intel compiler:
|
|
||||||
|
|
||||||
INTEL=ON
|
|
||||||
|
|
||||||
# Build for MIC architecture:
|
|
||||||
|
|
||||||
# INTEL_XEON_PHI=ON
|
|
||||||
|
|
||||||
# Build with HWLOC at location:
|
|
||||||
|
|
||||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.6.2"
|
|
||||||
|
|
||||||
# Location for MPI to use in examples:
|
|
||||||
|
|
||||||
MPI_BASE_DIR=""
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# MPI configuation only used for examples:
|
|
||||||
#
|
|
||||||
# Must have the MPI_BASE_DIR so that the
|
|
||||||
# include path can be passed to the Cuda compiler
|
|
||||||
|
|
||||||
if [ -n "${MPI_BASE_DIR}" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
|
|
||||||
else
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Pthread configuation:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# OpenMP configuation:
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure packages for kokkos-only:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Hardware locality cmake configuration:
|
|
||||||
|
|
||||||
if [ -n "${HWLOC_BASE_DIR}" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Cuda cmake configuration:
|
|
||||||
|
|
||||||
if [ -n "${CUDA_ARCH}" ] ;
|
|
||||||
then
|
|
||||||
|
|
||||||
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
|
|
||||||
# this is different than the standard CMAKE_CXX_FLAGS syntax.
|
|
||||||
|
|
||||||
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
|
|
||||||
|
|
||||||
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
|
|
||||||
then
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
|
|
||||||
else
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
|
|
||||||
fi
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# Cross-compile for Intel Xeon Phi:
|
|
||||||
|
|
||||||
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
|
|
||||||
then
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
|
|
||||||
|
|
||||||
# Cannot cross-compile fortran compatibility checks on the MIC:
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
|
|
||||||
# Tell cmake the answers to compile-and-execute tests
|
|
||||||
# to prevent cmake from executing a cross-compiled program.
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,140 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
#
|
|
||||||
# This script uses CUDA, OpenMP, and MPI.
|
|
||||||
#
|
|
||||||
# Before invoking this script, set the OMPI_CXX environment variable
|
|
||||||
# to point to nvcc_wrapper, wherever it happens to live. (If you use
|
|
||||||
# an MPI implementation other than OpenMPI, set the corresponding
|
|
||||||
# environment variable instead.)
|
|
||||||
#
|
|
||||||
|
|
||||||
rm -f CMakeCache.txt;
|
|
||||||
rm -rf CMakeFiles
|
|
||||||
EXTRA_ARGS=$@
|
|
||||||
MPI_PATH="/opt/mpi/openmpi/1.8.2/nvcc-gcc/4.8.3-6.5"
|
|
||||||
CUDA_PATH="/opt/nvidia/cuda/6.5.14"
|
|
||||||
|
|
||||||
#
|
|
||||||
# As long as there are any .cu files in Trilinos, we'll need to set
|
|
||||||
# CUDA_NVCC_FLAGS. If Trilinos gets rid of all of its .cu files and
|
|
||||||
# lets nvcc_wrapper handle them as .cpp files, then we won't need to
|
|
||||||
# set CUDA_NVCC_FLAGS. As it is, given that we need to set
|
|
||||||
# CUDA_NVCC_FLAGS, we must make sure that they are the same flags as
|
|
||||||
# nvcc_wrapper passes to nvcc.
|
|
||||||
#
|
|
||||||
CUDA_NVCC_FLAGS="-gencode;arch=compute_35,code=sm_35;-I${MPI_PATH}/include"
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
|
|
||||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3;-DKOKKOS_USE_CUDA_UVM"
|
|
||||||
|
|
||||||
cmake \
|
|
||||||
-D CMAKE_INSTALL_PREFIX:PATH="$PWD/../install/" \
|
|
||||||
-D CMAKE_BUILD_TYPE:STRING=DEBUG \
|
|
||||||
-D CMAKE_CXX_FLAGS:STRING="-g -Wall" \
|
|
||||||
-D CMAKE_C_FLAGS:STRING="-g -Wall" \
|
|
||||||
-D CMAKE_FORTRAN_FLAGS:STRING="" \
|
|
||||||
-D CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS="" \
|
|
||||||
-D Trilinos_ENABLE_Triutils=OFF \
|
|
||||||
-D Trilinos_ENABLE_INSTALL_CMAKE_CONFIG_FILES:BOOL=OFF \
|
|
||||||
-D Trilinos_ENABLE_DEBUG:BOOL=OFF \
|
|
||||||
-D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF \
|
|
||||||
-D Trilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=OFF \
|
|
||||||
-D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING="" \
|
|
||||||
-D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF \
|
|
||||||
-D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES:BOOL=OFF \
|
|
||||||
-D BUILD_SHARED_LIBS:BOOL=OFF \
|
|
||||||
-D DART_TESTING_TIMEOUT:STRING=600 \
|
|
||||||
-D CMAKE_VERBOSE_MAKEFILE:BOOL=OFF \
|
|
||||||
\
|
|
||||||
\
|
|
||||||
-D CMAKE_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \
|
|
||||||
-D CMAKE_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \
|
|
||||||
-D MPI_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \
|
|
||||||
-D MPI_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \
|
|
||||||
-D CMAKE_Fortran_COMPILER:FILEPATH="${MPI_PATH}/bin/mpif77" \
|
|
||||||
-D MPI_EXEC:FILEPATH="${MPI_PATH}/bin/mpirun" \
|
|
||||||
-D MPI_EXEC_POST_NUMPROCS_FLAGS:STRING="-bind-to;socket;--map-by;socket;env;CUDA_MANAGED_FORCE_DEVICE_ALLOC=1;CUDA_LAUNCH_BLOCKING=1;OMP_NUM_THREADS=2" \
|
|
||||||
\
|
|
||||||
\
|
|
||||||
-D Trilinos_ENABLE_CXX11:BOOL=OFF \
|
|
||||||
-D TPL_ENABLE_MPI:BOOL=ON \
|
|
||||||
-D Trilinos_ENABLE_OpenMP:BOOL=ON \
|
|
||||||
-D Trilinos_ENABLE_ThreadPool:BOOL=ON \
|
|
||||||
\
|
|
||||||
\
|
|
||||||
-D TPL_ENABLE_CUDA:BOOL=ON \
|
|
||||||
-D CUDA_TOOLKIT_ROOT_DIR:FILEPATH="${CUDA_PATH}" \
|
|
||||||
-D CUDA_PROPAGATE_HOST_FLAGS:BOOL=OFF \
|
|
||||||
-D TPL_ENABLE_Thrust:BOOL=OFF \
|
|
||||||
-D Thrust_INCLUDE_DIRS:FILEPATH="${CUDA_PATH}/include" \
|
|
||||||
-D TPL_ENABLE_CUSPARSE:BOOL=OFF \
|
|
||||||
-D TPL_ENABLE_Cusp:BOOL=OFF \
|
|
||||||
-D Cusp_INCLUDE_DIRS="/home/crtrott/Software/cusp" \
|
|
||||||
-D CUDA_VERBOSE_BUILD:BOOL=OFF \
|
|
||||||
-D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS} \
|
|
||||||
\
|
|
||||||
\
|
|
||||||
-D TPL_ENABLE_HWLOC=OFF \
|
|
||||||
-D HWLOC_INCLUDE_DIRS="/usr/local/software/hwloc/current/include" \
|
|
||||||
-D HWLOC_LIBRARY_DIRS="/usr/local/software/hwloc/current/lib" \
|
|
||||||
-D TPL_ENABLE_BinUtils=OFF \
|
|
||||||
-D TPL_ENABLE_BLAS:STRING=ON \
|
|
||||||
-D TPL_ENABLE_LAPACK:STRING=ON \
|
|
||||||
-D TPL_ENABLE_MKL:STRING=OFF \
|
|
||||||
-D TPL_ENABLE_HWLOC:STRING=OFF \
|
|
||||||
-D TPL_ENABLE_GTEST:STRING=ON \
|
|
||||||
-D TPL_ENABLE_SuperLU=ON \
|
|
||||||
-D TPL_ENABLE_BLAS=ON \
|
|
||||||
-D TPL_ENABLE_LAPACK=ON \
|
|
||||||
-D TPL_SuperLU_LIBRARIES="/home/crtrott/Software/SuperLU_4.3/lib/libsuperlu_4.3.a" \
|
|
||||||
-D TPL_SuperLU_INCLUDE_DIRS="/home/crtrott/Software/SuperLU_4.3/SRC" \
|
|
||||||
\
|
|
||||||
\
|
|
||||||
-D Trilinos_Enable_Kokkos:BOOL=ON \
|
|
||||||
-D Trilinos_ENABLE_KokkosCore:BOOL=ON \
|
|
||||||
-D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON \
|
|
||||||
-D Trilinos_ENABLE_KokkosContainers:BOOL=ON \
|
|
||||||
-D Trilinos_ENABLE_TpetraKernels:BOOL=ON \
|
|
||||||
-D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON \
|
|
||||||
-D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON \
|
|
||||||
-D Trilinos_ENABLE_KokkosExample:BOOL=ON \
|
|
||||||
-D Kokkos_ENABLE_EXAMPLES:BOOL=ON \
|
|
||||||
-D Kokkos_ENABLE_TESTS:BOOL=OFF \
|
|
||||||
-D KokkosClassic_DefaultNode:STRING="Kokkos::Compat::KokkosCudaWrapperNode" \
|
|
||||||
-D TpetraClassic_ENABLE_OpenMPNode=OFF \
|
|
||||||
-D TpetraClassic_ENABLE_TPINode=OFF \
|
|
||||||
-D TpetraClassic_ENABLE_MKL=OFF \
|
|
||||||
-D Kokkos_ENABLE_Cuda_UVM=ON \
|
|
||||||
\
|
|
||||||
\
|
|
||||||
-D Trilinos_ENABLE_Teuchos:BOOL=ON \
|
|
||||||
-D Teuchos_ENABLE_COMPLEX:BOOL=OFF \
|
|
||||||
\
|
|
||||||
\
|
|
||||||
-D Trilinos_ENABLE_Tpetra:BOOL=ON \
|
|
||||||
-D Tpetra_ENABLE_KokkosCore=ON \
|
|
||||||
-D Tpetra_ENABLE_Kokkos_DistObject=OFF \
|
|
||||||
-D Tpetra_ENABLE_Kokkos_Refactor=ON \
|
|
||||||
-D Tpetra_ENABLE_TESTS=ON \
|
|
||||||
-D Tpetra_ENABLE_EXAMPLES=ON \
|
|
||||||
-D Tpetra_ENABLE_MPI_CUDA_RDMA:BOOL=ON \
|
|
||||||
\
|
|
||||||
\
|
|
||||||
-D Trilinos_ENABLE_Belos=OFF \
|
|
||||||
-D Trilinos_ENABLE_Amesos=OFF \
|
|
||||||
-D Trilinos_ENABLE_Amesos2=OFF \
|
|
||||||
-D Trilinos_ENABLE_Ifpack=OFF \
|
|
||||||
-D Trilinos_ENABLE_Ifpack2=OFF \
|
|
||||||
-D Trilinos_ENABLE_Epetra=OFF \
|
|
||||||
-D Trilinos_ENABLE_EpetraExt=OFF \
|
|
||||||
-D Trilinos_ENABLE_Zoltan=OFF \
|
|
||||||
-D Trilinos_ENABLE_Zoltan2=OFF \
|
|
||||||
-D Trilinos_ENABLE_MueLu=OFF \
|
|
||||||
-D Belos_ENABLE_TESTS=ON \
|
|
||||||
-D Belos_ENABLE_EXAMPLES=ON \
|
|
||||||
-D MueLu_ENABLE_TESTS=ON \
|
|
||||||
-D MueLu_ENABLE_EXAMPLES=ON \
|
|
||||||
-D Ifpack2_ENABLE_TESTS=ON \
|
|
||||||
-D Ifpack2_ENABLE_EXAMPLES=ON \
|
|
||||||
$EXTRA_ARGS \
|
|
||||||
${HOME}/Trilinos
|
|
||||||
|
|
||||||
@ -1,113 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
|
||||||
#
|
|
||||||
# Cuda, OpenMP, Threads, Qthread, hwloc
|
|
||||||
#
|
|
||||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
|
||||||
#
|
|
||||||
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
|
|
||||||
#
|
|
||||||
# The 'nvcc-wrapper' module should load a script that matches
|
|
||||||
# kokkos/config/nvcc_wrapper
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Source and installation directories:
|
|
||||||
|
|
||||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
|
||||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Debug/optimized
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure for Kokkos subpackages and tests:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Hardware locality configuration:
|
|
||||||
|
|
||||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Pthread
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# OpenMP
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Qthread
|
|
||||||
|
|
||||||
QTHREAD_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREAD:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_INCLUDE_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/include"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_LIBRARY_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/lib"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# C++11
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
# Remove CMake output files to force reconfigure from scratch.
|
|
||||||
#
|
|
||||||
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#
|
|
||||||
|
|
||||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,104 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
|
||||||
#
|
|
||||||
# Cuda, OpenMP, hwloc
|
|
||||||
#
|
|
||||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
|
||||||
#
|
|
||||||
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
|
|
||||||
#
|
|
||||||
# The 'nvcc-wrapper' module should load a script that matches
|
|
||||||
# kokkos/config/nvcc_wrapper
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Source and installation directories:
|
|
||||||
|
|
||||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
|
||||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Debug/optimized
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure for Kokkos subpackages and tests:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Hardware locality configuration:
|
|
||||||
|
|
||||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Pthread explicitly OFF so tribits doesn't automatically turn it on
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# OpenMP
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# C++11
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
# Remove CMake output files to force reconfigure from scratch.
|
|
||||||
#
|
|
||||||
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#
|
|
||||||
|
|
||||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,88 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
|
||||||
#
|
|
||||||
# Cuda
|
|
||||||
#
|
|
||||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
|
||||||
#
|
|
||||||
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
|
|
||||||
#
|
|
||||||
# The 'nvcc-wrapper' module should load a script that matches
|
|
||||||
# kokkos/config/nvcc_wrapper
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Source and installation directories:
|
|
||||||
|
|
||||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
|
||||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Debug/optimized
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
|
||||||
|
|
||||||
# Pthread explicitly OFF, otherwise tribits will automatically turn it on
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure for Kokkos subpackages and tests:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# C++11
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
# Remove CMake output files to force reconfigure from scratch.
|
|
||||||
#
|
|
||||||
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#
|
|
||||||
|
|
||||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,84 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
|
||||||
#
|
|
||||||
# C++11, OpenMP
|
|
||||||
#
|
|
||||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
|
||||||
#
|
|
||||||
# module load cmake/2.8.11.2 gcc/4.8.3
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Source and installation directories:
|
|
||||||
|
|
||||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
|
||||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Debug/optimized
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure for Kokkos subpackages and tests:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Pthread explicitly OFF so tribits doesn't automatically activate
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# OpenMP
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# C++11
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
# Remove CMake output files to force reconfigure from scratch.
|
|
||||||
#
|
|
||||||
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#
|
|
||||||
|
|
||||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,78 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
|
||||||
#
|
|
||||||
# <none>
|
|
||||||
#
|
|
||||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
|
||||||
#
|
|
||||||
# module load cmake/2.8.11.2 gcc/4.8.3
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Source and installation directories:
|
|
||||||
|
|
||||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
|
||||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Debug/optimized
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure for Kokkos subpackages and tests:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Kokkos Pthread explicitly OFF, TPL Pthread ON for gtest
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# C++11
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
# Remove CMake output files to force reconfigure from scratch.
|
|
||||||
#
|
|
||||||
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#
|
|
||||||
|
|
||||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,89 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
|
||||||
#
|
|
||||||
# Intel, OpenMP, Cuda
|
|
||||||
#
|
|
||||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
|
||||||
#
|
|
||||||
# module load cmake/2.8.11.2 cuda/7.0.4 intel/2015.0.090 nvcc-wrapper/intel
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Source and installation directories:
|
|
||||||
|
|
||||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
|
||||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Debug/optimized
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure for Kokkos subpackages and tests:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Pthread explicitly OFF
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# OpenMP
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# C++11
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
# Remove CMake output files to force reconfigure from scratch.
|
|
||||||
#
|
|
||||||
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#
|
|
||||||
|
|
||||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,84 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
|
||||||
#
|
|
||||||
# Intel, OpenMP
|
|
||||||
#
|
|
||||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
|
||||||
#
|
|
||||||
# module load cmake/2.8.11.2 intel/13.SP1.1.106
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Source and installation directories:
|
|
||||||
|
|
||||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
|
||||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Debug/optimized
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure for Kokkos subpackages and tests:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Pthread explicitly OFF
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# OpenMP
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# C++11
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
# Remove CMake output files to force reconfigure from scratch.
|
|
||||||
#
|
|
||||||
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#
|
|
||||||
|
|
||||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,77 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
|
||||||
#
|
|
||||||
# OpenMP
|
|
||||||
#
|
|
||||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
|
||||||
#
|
|
||||||
# module load cmake/2.8.11.2 gcc/4.8.3
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Source and installation directories:
|
|
||||||
|
|
||||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
|
||||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Debug/optimized
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure for Kokkos subpackages and tests:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# OpenMP
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
|
||||||
|
|
||||||
# Pthread explicitly OFF, otherwise tribits will automatically turn it on
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
# Remove CMake output files to force reconfigure from scratch.
|
|
||||||
#
|
|
||||||
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#
|
|
||||||
|
|
||||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,87 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copy this script, put it outside the Trilinos source directory, and
|
|
||||||
# build there.
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
|
||||||
#
|
|
||||||
# Threads, hwloc
|
|
||||||
#
|
|
||||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
|
||||||
#
|
|
||||||
# module load cmake/2.8.11.2 gcc/4.8.3
|
|
||||||
#
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Source and installation directories:
|
|
||||||
|
|
||||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
|
||||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE=""
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Debug/optimized
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Configure for Kokkos subpackages and tests:
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Hardware locality configuration:
|
|
||||||
|
|
||||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# Pthread
|
|
||||||
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
|
||||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
# C++11
|
|
||||||
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
# Remove CMake output files to force reconfigure from scratch.
|
|
||||||
#
|
|
||||||
|
|
||||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|
||||||
|
|
||||||
#
|
|
||||||
|
|
||||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,257 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
#
|
|
||||||
# This shell script (nvcc_wrapper) wraps both the host compiler and
|
|
||||||
# NVCC, if you are building Trilinos with CUDA enabled. The script
|
|
||||||
# remedies some differences between the interface of NVCC and that of
|
|
||||||
# the host compiler, in particular for linking. It also means that
|
|
||||||
# Trilinos doesn't need separate .cu files; it can just use .cpp
|
|
||||||
# files.
|
|
||||||
#
|
|
||||||
# Hopefully, at some point, NVIDIA may fix NVCC so as to make this
|
|
||||||
# script obsolete. For now, this script exists and if you want to
|
|
||||||
# build Trilinos with CUDA enabled, you must use this script as your
|
|
||||||
# compiler.
|
|
||||||
|
|
||||||
# Default settings: change those according to your machine. For
|
|
||||||
# example, you may have have two different wrappers with either icpc
|
|
||||||
# or g++ as their back-end compiler. The defaults can be overwritten
|
|
||||||
# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
|
|
||||||
|
|
||||||
default_arch="sm_35"
|
|
||||||
#default_arch="sm_50"
|
|
||||||
|
|
||||||
#
|
|
||||||
# The default C++ compiler.
|
|
||||||
#
|
|
||||||
host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
|
|
||||||
#host_compiler="icpc"
|
|
||||||
#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
|
|
||||||
#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
|
|
||||||
|
|
||||||
#
|
|
||||||
# Internal variables
|
|
||||||
#
|
|
||||||
|
|
||||||
# C++ files
|
|
||||||
cpp_files=""
|
|
||||||
|
|
||||||
# Host compiler arguments
|
|
||||||
xcompiler_args=""
|
|
||||||
|
|
||||||
# Cuda (NVCC) only arguments
|
|
||||||
cuda_args=""
|
|
||||||
|
|
||||||
# Arguments for both NVCC and Host compiler
|
|
||||||
shared_args=""
|
|
||||||
|
|
||||||
# Linker arguments
|
|
||||||
xlinker_args=""
|
|
||||||
|
|
||||||
# Object files passable to NVCC
|
|
||||||
object_files=""
|
|
||||||
|
|
||||||
# Link objects for the host linker only
|
|
||||||
object_files_xlinker=""
|
|
||||||
|
|
||||||
# Does the User set the architecture
|
|
||||||
arch_set=0
|
|
||||||
|
|
||||||
# Does the user overwrite the host compiler
|
|
||||||
ccbin_set=0
|
|
||||||
|
|
||||||
#Error code of compilation
|
|
||||||
error_code=0
|
|
||||||
|
|
||||||
# Do a dry run without actually compiling
|
|
||||||
dry_run=0
|
|
||||||
|
|
||||||
# Skip NVCC compilation and use host compiler directly
|
|
||||||
host_only=0
|
|
||||||
|
|
||||||
# Enable workaround for CUDA 6.5 for pragma ident
|
|
||||||
replace_pragma_ident=0
|
|
||||||
|
|
||||||
# Mark first host compiler argument
|
|
||||||
first_xcompiler_arg=1
|
|
||||||
|
|
||||||
temp_dir=${TMPDIR:-/tmp}
|
|
||||||
|
|
||||||
#echo "Arguments: $# $@"
|
|
||||||
|
|
||||||
while [ $# -gt 0 ]
|
|
||||||
do
|
|
||||||
case $1 in
|
|
||||||
#show the executed command
|
|
||||||
--show|--nvcc-wrapper-show)
|
|
||||||
dry_run=1
|
|
||||||
;;
|
|
||||||
#run host compilation only
|
|
||||||
--host-only)
|
|
||||||
host_only=1
|
|
||||||
;;
|
|
||||||
#replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
|
|
||||||
--replace-pragma-ident)
|
|
||||||
replace_pragma_ident=1
|
|
||||||
;;
|
|
||||||
#handle source files to be compiled as cuda files
|
|
||||||
*.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
|
|
||||||
cpp_files="$cpp_files $1"
|
|
||||||
;;
|
|
||||||
#Handle shared args (valid for both nvcc and the host compiler)
|
|
||||||
-O*|-D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
|
|
||||||
shared_args="$shared_args $1"
|
|
||||||
;;
|
|
||||||
#Handle shared args that have an argument
|
|
||||||
-o)
|
|
||||||
shared_args="$shared_args $1 $2"
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
#Handle known nvcc args
|
|
||||||
-gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage)
|
|
||||||
cuda_args="$cuda_args $1"
|
|
||||||
;;
|
|
||||||
#Handle known nvcc args that have an argument
|
|
||||||
-rdc|-maxrregcount|--default-stream)
|
|
||||||
cuda_args="$cuda_args $1 $2"
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
#Handle c++11 setting
|
|
||||||
--std=c++11|-std=c++11)
|
|
||||||
shared_args="$shared_args $1"
|
|
||||||
;;
|
|
||||||
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
|
|
||||||
-std=c++98|--std=c++98)
|
|
||||||
;;
|
|
||||||
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
|
|
||||||
-pedantic|-Wpedantic|-ansi)
|
|
||||||
;;
|
|
||||||
#strip -Xcompiler because we add it
|
|
||||||
-Xcompiler)
|
|
||||||
;;
|
|
||||||
#strip of "-x cu" because we add that
|
|
||||||
-x)
|
|
||||||
if [[ $2 != "cu" ]]; then
|
|
||||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
|
||||||
xcompiler_args="-x,$2"
|
|
||||||
first_xcompiler_arg=0
|
|
||||||
else
|
|
||||||
xcompiler_args="$xcompiler_args,-x,$2"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
#Handle -ccbin (if its not set we can set it to a default value)
|
|
||||||
-ccbin)
|
|
||||||
cuda_args="$cuda_args $1 $2"
|
|
||||||
ccbin_set=1
|
|
||||||
host_compiler=$2
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
#Handle -arch argument (if its not set use a default
|
|
||||||
-arch*)
|
|
||||||
cuda_args="$cuda_args $1"
|
|
||||||
arch_set=1
|
|
||||||
;;
|
|
||||||
#Handle -Xcudafe argument
|
|
||||||
-Xcudafe)
|
|
||||||
cuda_args="$cuda_args -Xcudafe $2"
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
#Handle args that should be sent to the linker
|
|
||||||
-Wl*)
|
|
||||||
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
|
|
||||||
host_linker_args="$host_linker_args ${1:4:${#1}}"
|
|
||||||
;;
|
|
||||||
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
|
|
||||||
*.a|*.so|*.o|*.obj)
|
|
||||||
object_files="$object_files $1"
|
|
||||||
object_files_xlinker="$object_files_xlinker -Xlinker $1"
|
|
||||||
;;
|
|
||||||
#Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
|
|
||||||
*.so.*|*.dylib)
|
|
||||||
object_files="$object_files -Xlinker $1"
|
|
||||||
object_files_xlinker="$object_files_xlinker -Xlinker $1"
|
|
||||||
;;
|
|
||||||
#All other args are sent to the host compiler
|
|
||||||
*)
|
|
||||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
|
||||||
xcompiler_args=$1
|
|
||||||
first_xcompiler_arg=0
|
|
||||||
else
|
|
||||||
xcompiler_args="$xcompiler_args,$1"
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
shift
|
|
||||||
done
|
|
||||||
|
|
||||||
#Add default host compiler if necessary
|
|
||||||
if [ $ccbin_set -ne 1 ]; then
|
|
||||||
cuda_args="$cuda_args -ccbin $host_compiler"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#Add architecture command
|
|
||||||
if [ $arch_set -ne 1 ]; then
|
|
||||||
cuda_args="$cuda_args -arch=$default_arch"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#Compose compilation command
|
|
||||||
nvcc_command="nvcc $cuda_args $shared_args $xlinker_args"
|
|
||||||
if [ $first_xcompiler_arg -eq 0 ]; then
|
|
||||||
nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#Compose host only command
|
|
||||||
host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args"
|
|
||||||
|
|
||||||
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
|
|
||||||
if [ $replace_pragma_ident -eq 1 ]; then
|
|
||||||
cpp_files2=""
|
|
||||||
for file in $cpp_files
|
|
||||||
do
|
|
||||||
var=`grep pragma ${file} | grep ident | grep "#"`
|
|
||||||
if [ "${#var}" -gt 0 ]
|
|
||||||
then
|
|
||||||
sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file
|
|
||||||
cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file"
|
|
||||||
else
|
|
||||||
cpp_files2="$cpp_files2 $file"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
cpp_files=$cpp_files2
|
|
||||||
#echo $cpp_files
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$cpp_files" ]; then
|
|
||||||
nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files"
|
|
||||||
else
|
|
||||||
nvcc_command="$nvcc_command $object_files"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$cpp_files" ]; then
|
|
||||||
host_command="$host_command $object_files $cpp_files"
|
|
||||||
else
|
|
||||||
host_command="$host_command $object_files"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#Print command for dryrun
|
|
||||||
if [ $dry_run -eq 1 ]; then
|
|
||||||
if [ $host_only -eq 1 ]; then
|
|
||||||
echo $host_command
|
|
||||||
else
|
|
||||||
echo $nvcc_command
|
|
||||||
fi
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
#Run compilation command
|
|
||||||
if [ $host_only -eq 1 ]; then
|
|
||||||
$host_command
|
|
||||||
else
|
|
||||||
$nvcc_command
|
|
||||||
fi
|
|
||||||
error_code=$?
|
|
||||||
|
|
||||||
#Report error code
|
|
||||||
exit $error_code
|
|
||||||
@ -1,279 +0,0 @@
|
|||||||
#! /usr/bin/env python
|
|
||||||
|
|
||||||
"""
|
|
||||||
Snapshot a project into another project and perform the necessary repo actions
|
|
||||||
to provide a commit message that can be used to trace back to the exact point
|
|
||||||
in the source repository.
|
|
||||||
"""
|
|
||||||
|
|
||||||
#todo:
|
|
||||||
# Support svn
|
|
||||||
# Allow renaming of the source dir in the destination path
|
|
||||||
# Check if a new snapshot is necessary?
|
|
||||||
#
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
#check the version number so that there is a good error message when argparse is not available.
|
|
||||||
#This checks for exactly 2.7 which is bad, but it is a python 2 script and argparse was introduced
|
|
||||||
#in 2.7 which is also the last version of python 2. If this script is updated for python 3 this
|
|
||||||
#will need to change, but for now it is not safe to allow 3.x to run this.
|
|
||||||
if sys.version_info[:2] != (2, 7):
|
|
||||||
print "Error snapshot requires python 2.7 detected version is %d.%d." % (sys.version_info[0], sys.version_info[1])
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
import subprocess, argparse, re, doctest, os, datetime, traceback
|
|
||||||
|
|
||||||
def parse_cmdline(description):
|
|
||||||
parser = argparse.ArgumentParser(usage="snapshot.py [options] source destination", description=description)
|
|
||||||
|
|
||||||
parser.add_argument("-n", "--no-comit", action="store_false", dest="create_commit", default=True,
|
|
||||||
help="Do not perform a commit or create a commit message.")
|
|
||||||
parser.add_argument("-v", "--verbose", action="store_true", dest="verbose_mode", default=False,
|
|
||||||
help="Enable verbose mode.")
|
|
||||||
parser.add_argument("-d", "--debug", action="store_true", dest="debug_mode", default=False,
|
|
||||||
help="Enable debugging output.")
|
|
||||||
parser.add_argument("--no-validate-repo", action="store_true", dest="no_validate_repo", default=False,
|
|
||||||
help="Reduce the validation that the source and destination repos are clean to a warning.")
|
|
||||||
parser.add_argument("--source-repo", choices=["git","none"], default="",
|
|
||||||
help="Type of repository of the source, use none to skip all repository operations.")
|
|
||||||
parser.add_argument("--dest-repo", choices=["git","none"], default="",
|
|
||||||
help="Type of repository of the destination, use none to skip all repository operations.")
|
|
||||||
|
|
||||||
parser.add_argument("source", help="Source project to snapshot from.")
|
|
||||||
parser.add_argument("destination", help="Destination to snapshot too.")
|
|
||||||
|
|
||||||
options = parser.parse_args()
|
|
||||||
options = validate_options(options)
|
|
||||||
return options
|
|
||||||
#end parseCmdline
|
|
||||||
|
|
||||||
def validate_options(options):
|
|
||||||
apparent_source_repo_type="none"
|
|
||||||
apparent_dest_repo_type="none"
|
|
||||||
|
|
||||||
#prevent user from accidentally giving us a path that rsync will treat differently than expected.
|
|
||||||
options.source = options.source.rstrip(os.sep)
|
|
||||||
options.destination = options.destination.rstrip(os.sep)
|
|
||||||
|
|
||||||
options.source = os.path.abspath(options.source)
|
|
||||||
options.destination = os.path.abspath(options.destination)
|
|
||||||
|
|
||||||
if os.path.exists(options.source):
|
|
||||||
apparent_source_repo_type, source_root = deterimine_repo_type(options.source)
|
|
||||||
else:
|
|
||||||
raise RuntimeError("Could not find source directory of %s." % options.source)
|
|
||||||
options.source_root = source_root
|
|
||||||
|
|
||||||
if not os.path.exists(options.destination):
|
|
||||||
print "Could not find destination directory of %s so it will be created." % options.destination
|
|
||||||
os.makedirs(options.destination)
|
|
||||||
|
|
||||||
apparent_dest_repo_type, dest_root = deterimine_repo_type(options.destination)
|
|
||||||
options.dest_root = dest_root
|
|
||||||
|
|
||||||
#error on svn repo types for now
|
|
||||||
if apparent_source_repo_type == "svn" or apparent_dest_repo_type == "svn":
|
|
||||||
raise RuntimeError("SVN repositories are not supported at this time.")
|
|
||||||
|
|
||||||
if options.source_repo == "":
|
|
||||||
#source repo type is not specified to just using the apparent type.
|
|
||||||
options.source_repo = apparent_source_repo_type
|
|
||||||
else:
|
|
||||||
if options.source_repo != "none" and options.source_repo != apparent_source_repo_type:
|
|
||||||
raise RuntimeError("Specified source repository type of %s conflicts with determined type of %s" % \
|
|
||||||
(options.source_repo, apparent_source_repo_type))
|
|
||||||
|
|
||||||
if options.dest_repo == "":
|
|
||||||
#destination repo type is not specified to just using the apparent type.
|
|
||||||
options.dest_repo = apparent_dest_repo_type
|
|
||||||
else:
|
|
||||||
if options.dest_repo != "none" and options.dest_repo != apparent_dest_repo_type:
|
|
||||||
raise RuntimeError("Specified destination repository type of %s conflicts with determined type of %s" % \
|
|
||||||
(options.dest_repo, apparent_dest_repo_type))
|
|
||||||
|
|
||||||
return options
|
|
||||||
#end validate_options
|
|
||||||
|
|
||||||
def run_cmd(cmd, options, working_dir="."):
|
|
||||||
cmd_str = " ".join(cmd)
|
|
||||||
if options.verbose_mode:
|
|
||||||
print "Running command '%s' in dir %s." % (cmd_str, working_dir)
|
|
||||||
|
|
||||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_dir)
|
|
||||||
proc_stdout, proc_stderr = proc.communicate()
|
|
||||||
ret_val = proc.wait()
|
|
||||||
|
|
||||||
if options.debug_mode:
|
|
||||||
print "==== %s stdout start ====" % cmd_str
|
|
||||||
print proc_stdout
|
|
||||||
print "==== %s stdout end ====" % cmd_str
|
|
||||||
print "==== %s stderr ====" % cmd_str
|
|
||||||
print proc_stderr
|
|
||||||
print "==== %s stderr ====" % cmd_str
|
|
||||||
|
|
||||||
if ret_val != 0:
|
|
||||||
raise RuntimeError("Command '%s' failed with error code %d. Error message:%s%s%sstdout:%s" % \
|
|
||||||
(cmd_str, ret_val, os.linesep, proc_stderr, os.linesep, proc_stdout))
|
|
||||||
|
|
||||||
return proc_stdout, proc_stderr
|
|
||||||
#end run_cmd
|
|
||||||
|
|
||||||
def deterimine_repo_type(location):
|
|
||||||
apparent_repo_type = "none"
|
|
||||||
|
|
||||||
while location != "":
|
|
||||||
if os.path.exists(os.path.join(location, ".git")):
|
|
||||||
apparent_repo_type = "git"
|
|
||||||
break
|
|
||||||
elif os.path.exists(os.path.join(location, ".svn")):
|
|
||||||
apparent_repo_type = "svn"
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
location = location[:location.rfind(os.sep)]
|
|
||||||
|
|
||||||
return apparent_repo_type, location
|
|
||||||
|
|
||||||
#end deterimine_repo_type
|
|
||||||
|
|
||||||
def rsync(source, dest, options):
|
|
||||||
rsync_cmd = ["rsync", "-ar", "--delete"]
|
|
||||||
if options.debug_mode:
|
|
||||||
rsync_cmd.append("-v")
|
|
||||||
|
|
||||||
if options.source_repo == "git":
|
|
||||||
rsync_cmd.append("--exclude=.git")
|
|
||||||
|
|
||||||
rsync_cmd.append(options.source)
|
|
||||||
rsync_cmd.append(options.destination)
|
|
||||||
run_cmd(rsync_cmd, options)
|
|
||||||
#end rsync
|
|
||||||
|
|
||||||
def create_commit_message(commit_id, commit_log, project_name, project_location):
|
|
||||||
eol = os.linesep
|
|
||||||
message = "Snapshot of %s from commit %s" % (project_name, commit_id)
|
|
||||||
message += eol * 2
|
|
||||||
message += "From repository at %s" % project_location
|
|
||||||
message += eol * 2
|
|
||||||
message += "At commit:" + eol
|
|
||||||
message += commit_log
|
|
||||||
return message
|
|
||||||
#end create_commit_message
|
|
||||||
|
|
||||||
def find_git_commit_information(options):
|
|
||||||
r"""
|
|
||||||
>>> class fake_options:
|
|
||||||
... source="."
|
|
||||||
... verbose_mode=False
|
|
||||||
... debug_mode=False
|
|
||||||
>>> myoptions = fake_options()
|
|
||||||
>>> find_git_commit_information(myoptions)[2:]
|
|
||||||
('sems', 'software.sandia.gov:/git/sems')
|
|
||||||
"""
|
|
||||||
git_log_cmd = ["git", "log", "-1"]
|
|
||||||
|
|
||||||
output, error = run_cmd(git_log_cmd, options, options.source)
|
|
||||||
|
|
||||||
commit_match = re.match("commit ([0-9a-fA-F]+)", output)
|
|
||||||
commit_id = commit_match.group(1)
|
|
||||||
commit_log = output
|
|
||||||
|
|
||||||
git_remote_cmd = ["git", "remote", "-v"]
|
|
||||||
output, error = run_cmd(git_remote_cmd, options, options.source)
|
|
||||||
|
|
||||||
remote_match = re.search("origin\s([^ ]*/([^ ]+))", output, re.MULTILINE)
|
|
||||||
if not remote_match:
|
|
||||||
raise RuntimeError("Could not find origin of repo at %s. Consider using none for source repo type." % (options.source))
|
|
||||||
|
|
||||||
source_location = remote_match.group(1)
|
|
||||||
source_name = remote_match.group(2).strip()
|
|
||||||
|
|
||||||
if source_name[-1] == "/":
|
|
||||||
source_name = source_name[:-1]
|
|
||||||
|
|
||||||
return commit_id, commit_log, source_name, source_location
|
|
||||||
|
|
||||||
#end find_git_commit_information
|
|
||||||
|
|
||||||
def do_git_commit(message, options):
|
|
||||||
if options.verbose_mode:
|
|
||||||
print "Commiting to destination repository."
|
|
||||||
|
|
||||||
git_add_cmd = ["git", "add", "-A"]
|
|
||||||
run_cmd(git_add_cmd, options, options.destination)
|
|
||||||
|
|
||||||
git_commit_cmd = ["git", "commit", "-m%s" % message]
|
|
||||||
run_cmd(git_commit_cmd, options, options.destination)
|
|
||||||
|
|
||||||
git_log_cmd = ["git", "log", "--format=%h", "-1"]
|
|
||||||
commit_sha1, error = run_cmd(git_log_cmd, options, options.destination)
|
|
||||||
|
|
||||||
print "Commit %s was made to %s." % (commit_sha1.strip(), options.dest_root)
|
|
||||||
#end do_git_commit
|
|
||||||
|
|
||||||
def verify_git_repo_clean(location, options):
|
|
||||||
git_status_cmd = ["git", "status", "--porcelain"]
|
|
||||||
output, error = run_cmd(git_status_cmd, options, location)
|
|
||||||
|
|
||||||
if output != "":
|
|
||||||
if options.no_validate_repo == False:
|
|
||||||
raise RuntimeError("%s is not clean.%sPlease commit or stash all changes before running snapshot."
|
|
||||||
% (location, os.linesep))
|
|
||||||
else:
|
|
||||||
print "WARNING: %s is not clean. Proceeding anyway." % location
|
|
||||||
print "WARNING: This could lead to differences in the source and destination."
|
|
||||||
print "WARNING: It could also lead to extra files being included in the snapshot commit."
|
|
||||||
|
|
||||||
#end verify_git_repo_clean
|
|
||||||
|
|
||||||
def main(options):
|
|
||||||
if options.verbose_mode:
|
|
||||||
print "Snapshotting %s to %s." % (options.source, options.destination)
|
|
||||||
|
|
||||||
if options.source_repo == "git":
|
|
||||||
verify_git_repo_clean(options.source, options)
|
|
||||||
commit_id, commit_log, repo_name, repo_location = find_git_commit_information(options)
|
|
||||||
elif options.source_repo == "none":
|
|
||||||
commit_id = "N/A"
|
|
||||||
commit_log = "Unknown commit from %s snapshotted at: %s" % (options.source, datetime.datetime.now())
|
|
||||||
repo_name = options.source
|
|
||||||
repo_location = options.source
|
|
||||||
|
|
||||||
commit_message = create_commit_message(commit_id, commit_log, repo_name, repo_location) + os.linesep*2
|
|
||||||
|
|
||||||
if options.dest_repo == "git":
|
|
||||||
verify_git_repo_clean(options.destination, options)
|
|
||||||
|
|
||||||
rsync(options.source, options.destination, options)
|
|
||||||
|
|
||||||
if options.dest_repo == "git":
|
|
||||||
do_git_commit(commit_message, options)
|
|
||||||
elif options.dest_repo == "none":
|
|
||||||
file_name = "snapshot_message.txt"
|
|
||||||
message_file = open(file_name, "w")
|
|
||||||
message_file.write(commit_message)
|
|
||||||
message_file.close()
|
|
||||||
cwd = os.getcwd()
|
|
||||||
print "No commit done by request. Please use file at:"
|
|
||||||
print "%s%sif you wish to commit this to a repo later." % (cwd+"/"+file_name, os.linesep)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#end main
|
|
||||||
|
|
||||||
if (__name__ == "__main__"):
|
|
||||||
if ("--test" in sys.argv):
|
|
||||||
doctest.testmod()
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
try:
|
|
||||||
options = parse_cmdline(__doc__)
|
|
||||||
main(options)
|
|
||||||
except RuntimeError, e:
|
|
||||||
print "Error occured:", e
|
|
||||||
if "--debug" in sys.argv:
|
|
||||||
traceback.print_exc()
|
|
||||||
sys.exit(1)
|
|
||||||
else:
|
|
||||||
sys.exit(0)
|
|
||||||
@ -1,437 +0,0 @@
|
|||||||
#!/bin/bash -e
|
|
||||||
|
|
||||||
#
|
|
||||||
# Global config
|
|
||||||
#
|
|
||||||
|
|
||||||
set -o pipefail
|
|
||||||
|
|
||||||
GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
|
|
||||||
INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
|
|
||||||
CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
|
|
||||||
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
|
|
||||||
|
|
||||||
GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
|
|
||||||
CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
|
||||||
INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
|
||||||
CUDA_WARNING_FLAGS=""
|
|
||||||
|
|
||||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>/base,hwloc/1.10.1/<COMPILER_NAME>/<COMPILER_VERSION>/base"
|
|
||||||
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.7.2/base"
|
|
||||||
|
|
||||||
export OMP_NUM_THREADS=4
|
|
||||||
|
|
||||||
declare -i NUM_RESULTS_TO_KEEP=7
|
|
||||||
|
|
||||||
RESULT_ROOT_PREFIX=TestAll
|
|
||||||
|
|
||||||
source /projects/modulefiles/utils/sems-modules-init.sh
|
|
||||||
source /projects/modulefiles/utils/kokkos-modules-init.sh
|
|
||||||
|
|
||||||
SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )
|
|
||||||
|
|
||||||
#
|
|
||||||
# Handle arguments
|
|
||||||
#
|
|
||||||
|
|
||||||
DEBUG=False
|
|
||||||
ARGS=""
|
|
||||||
CUSTOM_BUILD_LIST=""
|
|
||||||
DRYRUN=False
|
|
||||||
BUILD_ONLY=False
|
|
||||||
declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3
|
|
||||||
TEST_SCRIPT=False
|
|
||||||
|
|
||||||
while [[ $# > 0 ]]
|
|
||||||
do
|
|
||||||
key="$1"
|
|
||||||
case $key in
|
|
||||||
--kokkos-path*)
|
|
||||||
KOKKOS_PATH="${key#*=}"
|
|
||||||
;;
|
|
||||||
--build-list*)
|
|
||||||
CUSTOM_BUILD_LIST="${key#*=}"
|
|
||||||
;;
|
|
||||||
--debug*)
|
|
||||||
DEBUG=True
|
|
||||||
;;
|
|
||||||
--build-only*)
|
|
||||||
BUILD_ONLY=True
|
|
||||||
;;
|
|
||||||
--test-script*)
|
|
||||||
TEST_SCRIPT=True
|
|
||||||
;;
|
|
||||||
--num*)
|
|
||||||
NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}"
|
|
||||||
;;
|
|
||||||
--dry-run*)
|
|
||||||
DRYRUN=True
|
|
||||||
;;
|
|
||||||
--help)
|
|
||||||
echo "test_all_sandia <ARGS> <OPTIONS>:"
|
|
||||||
echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory"
|
|
||||||
echo " Defaults to root repo containing this script"
|
|
||||||
echo "--debug: Run tests in debug. Defaults to False"
|
|
||||||
echo "--test-script: Test this script, not Kokkos"
|
|
||||||
echo "--num=N: Number of jobs to run in parallel "
|
|
||||||
echo "--dry-run: Just print what would be executed"
|
|
||||||
echo "--build-only: Just do builds, don't run anything"
|
|
||||||
echo "--build-list=BUILD,BUILD,BUILD..."
|
|
||||||
echo " Provide a comma-separated list of builds instead of running all builds"
|
|
||||||
echo " Valid items:"
|
|
||||||
echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial"
|
|
||||||
echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial"
|
|
||||||
echo ""
|
|
||||||
echo "ARGS: list of expressions matching compilers to test"
|
|
||||||
echo " supported compilers"
|
|
||||||
echo " gcc/4.7.2"
|
|
||||||
echo " gcc/4.8.4"
|
|
||||||
echo " gcc/4.9.2"
|
|
||||||
echo " gcc/5.1.0"
|
|
||||||
echo " intel/14.0.4"
|
|
||||||
echo " intel/15.0.2"
|
|
||||||
echo " clang/3.5.2"
|
|
||||||
echo " clang/3.6.1"
|
|
||||||
echo " cuda/6.5.14"
|
|
||||||
echo " cuda/7.0.28"
|
|
||||||
echo " cuda/7.5.18"
|
|
||||||
echo ""
|
|
||||||
echo "Examples:"
|
|
||||||
echo " Run all tests"
|
|
||||||
echo " % test_all_sandia"
|
|
||||||
echo ""
|
|
||||||
echo " Run all gcc tests"
|
|
||||||
echo " % test_all_sandia gcc"
|
|
||||||
echo ""
|
|
||||||
echo " Run all gcc/4.7.2 and all intel tests"
|
|
||||||
echo " % test_all_sandia gcc/4.7.2 intel"
|
|
||||||
echo ""
|
|
||||||
echo " Run all tests in debug"
|
|
||||||
echo " % test_all_sandia --debug"
|
|
||||||
echo ""
|
|
||||||
echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds"
|
|
||||||
echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial"
|
|
||||||
echo ""
|
|
||||||
echo "If you want to kill the tests, do:"
|
|
||||||
echo " hit ctrl-z"
|
|
||||||
echo " % kill -9 %1"
|
|
||||||
echo
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
# args, just append
|
|
||||||
ARGS="$ARGS $1"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
shift
|
|
||||||
done
|
|
||||||
|
|
||||||
# set kokkos path
|
|
||||||
if [ -z "$KOKKOS_PATH" ]; then
|
|
||||||
KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
|
|
||||||
else
|
|
||||||
# Ensure KOKKOS_PATH is abs path
|
|
||||||
KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
|
|
||||||
fi
|
|
||||||
|
|
||||||
# set build type
|
|
||||||
if [ "$DEBUG" = "True" ]; then
|
|
||||||
BUILD_TYPE=debug
|
|
||||||
else
|
|
||||||
BUILD_TYPE=release
|
|
||||||
fi
|
|
||||||
|
|
||||||
# If no args provided, do all compilers
|
|
||||||
if [ -z "$ARGS" ]; then
|
|
||||||
ARGS='?'
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
|
||||||
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
|
||||||
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
|
||||||
"gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
|
||||||
"gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
|
||||||
"intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
|
||||||
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
|
||||||
"clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
|
||||||
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
|
||||||
"cuda/6.5.14 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
|
||||||
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
|
||||||
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Process args to figure out which compilers to test
|
|
||||||
COMPILERS_TO_TEST=""
|
|
||||||
for ARG in $ARGS; do
|
|
||||||
for COMPILER_DATA in "${COMPILERS[@]}"; do
|
|
||||||
ARR=($COMPILER_DATA)
|
|
||||||
COMPILER=${ARR[0]}
|
|
||||||
if [[ "$COMPILER" = $ARG* ]]; then
|
|
||||||
if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then
|
|
||||||
COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER"
|
|
||||||
else
|
|
||||||
echo "Tried to add $COMPILER twice"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
done
|
|
||||||
|
|
||||||
#
|
|
||||||
# Functions
|
|
||||||
#
|
|
||||||
|
|
||||||
# get_compiler_name <COMPILER>
|
|
||||||
get_compiler_name() {
|
|
||||||
echo $1 | cut -d/ -f1
|
|
||||||
}
|
|
||||||
|
|
||||||
# get_compiler_version <COMPILER>
|
|
||||||
get_compiler_version() {
|
|
||||||
echo $1 | cut -d/ -f2
|
|
||||||
}
|
|
||||||
|
|
||||||
# Do not call directly
|
|
||||||
get_compiler_data() {
|
|
||||||
local compiler=$1
|
|
||||||
local item=$2
|
|
||||||
local compiler_name=$(get_compiler_name $compiler)
|
|
||||||
local compiler_vers=$(get_compiler_version $compiler)
|
|
||||||
|
|
||||||
local compiler_data
|
|
||||||
for compiler_data in "${COMPILERS[@]}" ; do
|
|
||||||
local arr=($compiler_data)
|
|
||||||
if [ "$compiler" = "${arr[0]}" ]; then
|
|
||||||
echo "${arr[$item]}" | tr , ' ' | sed -e "s/<COMPILER_NAME>/$compiler_name/g" -e "s/<COMPILER_VERSION>/$compiler_vers/g"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
# Not found
|
|
||||||
echo "Unreconized compiler $compiler" >&2
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
#
|
|
||||||
# For all getters, usage: <GETTER> <COMPILER>
|
|
||||||
#
|
|
||||||
|
|
||||||
get_compiler_modules() {
|
|
||||||
get_compiler_data $1 1
|
|
||||||
}
|
|
||||||
|
|
||||||
get_compiler_build_list() {
|
|
||||||
get_compiler_data $1 2
|
|
||||||
}
|
|
||||||
|
|
||||||
get_compiler_exe_name() {
|
|
||||||
get_compiler_data $1 3
|
|
||||||
}
|
|
||||||
|
|
||||||
get_compiler_warning_flags() {
|
|
||||||
get_compiler_data $1 4
|
|
||||||
}
|
|
||||||
|
|
||||||
run_cmd() {
|
|
||||||
echo "RUNNING: $*"
|
|
||||||
if [ "$DRYRUN" != "True" ]; then
|
|
||||||
eval "$* 2>&1"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# report_and_log_test_results <SUCCESS> <DESC> <PHASE>
|
|
||||||
report_and_log_test_result() {
|
|
||||||
# Use sane var names
|
|
||||||
local success=$1; local desc=$2; local phase=$3;
|
|
||||||
|
|
||||||
if [ "$success" = "0" ]; then
|
|
||||||
echo " PASSED $desc"
|
|
||||||
touch $PASSED_DIR/$desc
|
|
||||||
else
|
|
||||||
echo " FAILED $desc" >&2
|
|
||||||
echo $phase > $FAILED_DIR/$desc
|
|
||||||
cat ${desc}.${phase}.log
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
setup_env() {
|
|
||||||
local compiler=$1
|
|
||||||
local compiler_modules=$(get_compiler_modules $compiler)
|
|
||||||
|
|
||||||
module purge
|
|
||||||
|
|
||||||
local mod
|
|
||||||
for mod in $compiler_modules; do
|
|
||||||
module load $mod 2>&1
|
|
||||||
# It is ridiculously hard to check for the success of a loaded
|
|
||||||
# module. Module does not return error codes and piping to grep
|
|
||||||
# causes module to run in a subshell.
|
|
||||||
module list 2>&1 | grep "$mod" >& /dev/null || return 1
|
|
||||||
done
|
|
||||||
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# single_build_and_test <COMPILER> <BUILD> <BUILD_TYPE>
|
|
||||||
single_build_and_test() {
|
|
||||||
# Use sane var names
|
|
||||||
local compiler=$1; local build=$2; local build_type=$3;
|
|
||||||
|
|
||||||
# set up env
|
|
||||||
mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type"
|
|
||||||
cd $ROOT_DIR/$compiler/"${build}-$build_type"
|
|
||||||
local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g')
|
|
||||||
setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
|
|
||||||
|
|
||||||
# Set up flags
|
|
||||||
local compiler_warning_flags=$(get_compiler_warning_flags $compiler)
|
|
||||||
local compiler_exe=$(get_compiler_exe_name $compiler)
|
|
||||||
|
|
||||||
if [[ "$build_type" = hwloc* ]]; then
|
|
||||||
local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info)))
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ "$build_type" = *debug* ]]; then
|
|
||||||
local extra_args="$extra_args --debug"
|
|
||||||
local cxxflags="-g $compiler_warning_flags"
|
|
||||||
else
|
|
||||||
local cxxflags="-O3 $compiler_warning_flags"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ "$compiler" == cuda* ]]; then
|
|
||||||
cxxflags="--keep --keep-dir=$(pwd) $cxxflags"
|
|
||||||
export TMPDIR=$(pwd)
|
|
||||||
fi
|
|
||||||
|
|
||||||
# cxxflags="-DKOKKOS_USING_EXPERIMENTAL_VIEW $cxxflags"
|
|
||||||
|
|
||||||
echo " Starting job $desc"
|
|
||||||
|
|
||||||
if [ "$TEST_SCRIPT" = "True" ]; then
|
|
||||||
local rand=$[ 1 + $[ RANDOM % 10 ]]
|
|
||||||
sleep $rand
|
|
||||||
if [ $rand -gt 5 ]; then
|
|
||||||
run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; }
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
|
|
||||||
run_cmd make build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
|
|
||||||
if [[ "$BUILD_ONLY" == False ]]; then
|
|
||||||
run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; }
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
report_and_log_test_result 0 $desc
|
|
||||||
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# wait_for_jobs <NUM-JOBS>
|
|
||||||
wait_for_jobs() {
|
|
||||||
local -i max_jobs=$1
|
|
||||||
local -i num_active_jobs=$(jobs | wc -l)
|
|
||||||
while [ $num_active_jobs -ge $max_jobs ]
|
|
||||||
do
|
|
||||||
sleep 1
|
|
||||||
num_active_jobs=$(jobs | wc -l)
|
|
||||||
jobs >& /dev/null
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
# run_in_background <COMPILER> <BUILD> <BUILD_TYPE>
|
|
||||||
run_in_background() {
|
|
||||||
local compiler=$1
|
|
||||||
|
|
||||||
local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL
|
|
||||||
if [[ "$BUILD_ONLY" == True ]]; then
|
|
||||||
num_jobs=8
|
|
||||||
else
|
|
||||||
if [[ "$compiler" == cuda* ]]; then
|
|
||||||
num_jobs=1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
wait_for_jobs $num_jobs
|
|
||||||
|
|
||||||
single_build_and_test $* &
|
|
||||||
}
|
|
||||||
|
|
||||||
# build_and_test_all <COMPILER>
|
|
||||||
build_and_test_all() {
|
|
||||||
# Get compiler data
|
|
||||||
local compiler=$1
|
|
||||||
if [ -z "$CUSTOM_BUILD_LIST" ]; then
|
|
||||||
local compiler_build_list=$(get_compiler_build_list $compiler)
|
|
||||||
else
|
|
||||||
local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ')
|
|
||||||
fi
|
|
||||||
|
|
||||||
# do builds
|
|
||||||
local build
|
|
||||||
for build in $compiler_build_list
|
|
||||||
do
|
|
||||||
run_in_background $compiler $build $BUILD_TYPE
|
|
||||||
|
|
||||||
# If not cuda, do a hwloc test too
|
|
||||||
if [[ "$compiler" != cuda* ]]; then
|
|
||||||
run_in_background $compiler $build "hwloc-$BUILD_TYPE"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
get_test_root_dir() {
|
|
||||||
local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort)
|
|
||||||
local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l)
|
|
||||||
local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP}
|
|
||||||
|
|
||||||
if [ $num_to_delete -gt 0 ]; then
|
|
||||||
/bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete)
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S")
|
|
||||||
}
|
|
||||||
|
|
||||||
wait_summarize_and_exit() {
|
|
||||||
wait_for_jobs 1
|
|
||||||
|
|
||||||
echo "#######################################################"
|
|
||||||
echo "PASSED TESTS"
|
|
||||||
echo "#######################################################"
|
|
||||||
|
|
||||||
\ls -1 $PASSED_DIR | sort
|
|
||||||
|
|
||||||
echo "#######################################################"
|
|
||||||
echo "FAILED TESTS"
|
|
||||||
echo "#######################################################"
|
|
||||||
|
|
||||||
local failed_test
|
|
||||||
local -i rv=0
|
|
||||||
for failed_test in $(\ls -1 $FAILED_DIR)
|
|
||||||
do
|
|
||||||
echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)"
|
|
||||||
rv=$rv+1
|
|
||||||
done
|
|
||||||
|
|
||||||
exit $rv
|
|
||||||
}
|
|
||||||
|
|
||||||
#
|
|
||||||
# Main
|
|
||||||
#
|
|
||||||
|
|
||||||
ROOT_DIR=$(get_test_root_dir)
|
|
||||||
mkdir -p $ROOT_DIR
|
|
||||||
cd $ROOT_DIR
|
|
||||||
|
|
||||||
PASSED_DIR=$ROOT_DIR/results/passed
|
|
||||||
FAILED_DIR=$ROOT_DIR/results/failed
|
|
||||||
mkdir -p $PASSED_DIR
|
|
||||||
mkdir -p $FAILED_DIR
|
|
||||||
|
|
||||||
echo "Going to test compilers: " $COMPILERS_TO_TEST
|
|
||||||
for COMPILER in $COMPILERS_TO_TEST; do
|
|
||||||
echo "Testing compiler $COMPILER"
|
|
||||||
build_and_test_all $COMPILER
|
|
||||||
done
|
|
||||||
|
|
||||||
wait_summarize_and_exit
|
|
||||||
@ -1,5 +0,0 @@
|
|||||||
jenkins_test_driver is designed to be run through Jenkins as a
|
|
||||||
multiconfiguration job. It relies on a number of environment variables that will
|
|
||||||
only be set when run in that context. It is possible to override these if you
|
|
||||||
know the Jenkins job setup. It is not recommended that a non-expert try to run
|
|
||||||
this script directly.
|
|
||||||
@ -1,83 +0,0 @@
|
|||||||
#!/bin/bash -x
|
|
||||||
|
|
||||||
echo "Building for BUILD_TYPE = ${BUILD_TYPE}"
|
|
||||||
echo "Building with HOST_COMPILER = ${HOST_COMPILER}"
|
|
||||||
echo "Building in ${WORKSPACE}"
|
|
||||||
|
|
||||||
module use /home/projects/modulefiles
|
|
||||||
|
|
||||||
BUILD_TYPE=`echo $BUILD_TYPE | tr "~" " "`
|
|
||||||
build_options=""
|
|
||||||
for item in ${BUILD_TYPE}; do
|
|
||||||
build_options="$build_options --with-$item"
|
|
||||||
done
|
|
||||||
|
|
||||||
kokkos_path=${WORKSPACE}/kokkos
|
|
||||||
gtest_path=${WORKSPACE}/kokkos/tpls/gtest
|
|
||||||
|
|
||||||
echo ${WORKSPACE}
|
|
||||||
pwd
|
|
||||||
|
|
||||||
#extract information from the provided parameters.
|
|
||||||
host_compiler_brand=`echo $HOST_COMPILER | grep -o "^[a-zA-Z]*"`
|
|
||||||
cuda_compiler=`echo $BUILD_TYPE | grep -o "cuda_[^ ]*"`
|
|
||||||
|
|
||||||
host_compiler_module=`echo $HOST_COMPILER | tr "_" "/"`
|
|
||||||
cuda_compiler_module=`echo $cuda_compiler | tr "_" "/"`
|
|
||||||
build_path=`echo $BUILD_TYPE | tr " " "_"`
|
|
||||||
|
|
||||||
module load $host_compiler_module
|
|
||||||
module load $cuda_compiler_module
|
|
||||||
|
|
||||||
case $host_compiler_brand in
|
|
||||||
gcc)
|
|
||||||
module load nvcc-wrapper/gnu
|
|
||||||
compiler=g++
|
|
||||||
;;
|
|
||||||
intel)
|
|
||||||
module load nvcc-wrapper/intel
|
|
||||||
compiler=icpc
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unrecognized compiler brand."
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
#if cuda is on we need to set the host compiler for the
|
|
||||||
#nvcc wrapper and make the wrapper the compiler.
|
|
||||||
if [ $cuda_compiler != "" ]; then
|
|
||||||
export NVCC_WRAPPER_DEFAULT_COMPILER=$compiler
|
|
||||||
compiler=$kokkos_path/config/nvcc_wrapper
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $host_compiler_brand == "intel" -a $cuda_compiler != "" ]; then
|
|
||||||
echo "Intel compilers are not supported with cuda at this time."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
rm -rf test-$build_path
|
|
||||||
mkdir test-$build_path
|
|
||||||
cd test-$build_path
|
|
||||||
|
|
||||||
/bin/bash $kokkos_path/generate_makefile.bash $build_options --kokkos-path="$kokkos_path" --with-gtest="$gtest_path" --compiler=$compiler 2>&1 |tee configure.out
|
|
||||||
|
|
||||||
if [ ${PIPESTATUS[0]} != 0 ]; then
|
|
||||||
echo "Configure failed."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
make build-test 2>&1 | tee build.log
|
|
||||||
|
|
||||||
if [ ${PIPESTATUS[0]} != 0 ]; then
|
|
||||||
echo "Build failed."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
make test 2>&1 | tee test.log
|
|
||||||
|
|
||||||
grep "FAIL" test.log
|
|
||||||
if [ $? == 0 ]; then
|
|
||||||
echo "Tests failed."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
@ -1,287 +0,0 @@
|
|||||||
#! /usr/bin/env python
|
|
||||||
|
|
||||||
"""
|
|
||||||
Compute the size at which the current compiler will start to
|
|
||||||
significantly scale back optimization.
|
|
||||||
|
|
||||||
The CPP file being modified will need the following tags.
|
|
||||||
// JGF_DUPLICATE_BEGIN - Put before start of function to duplicate
|
|
||||||
// JGF_DUPLICATE_END - Put after end of function to duplcate
|
|
||||||
// JGF_DUPE function_name(args); - Put anywhere where it's legal to
|
|
||||||
put a function call but not in your timing section.
|
|
||||||
|
|
||||||
The program will need to output the string:
|
|
||||||
FOM: <number>
|
|
||||||
This will represent the program's performance
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse, sys, os, doctest, subprocess, re, time
|
|
||||||
|
|
||||||
VERBOSE = False
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
def parse_command_line(args, description):
|
|
||||||
###############################################################################
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
usage="""\n%s <cppfile> <build-command> <run-command> [--verbose]
|
|
||||||
OR
|
|
||||||
%s --help
|
|
||||||
OR
|
|
||||||
%s --test
|
|
||||||
|
|
||||||
\033[1mEXAMPLES:\033[0m
|
|
||||||
> %s foo.cpp 'make -j4' foo
|
|
||||||
""" % ((os.path.basename(args[0]), ) * 4),
|
|
||||||
|
|
||||||
description=description,
|
|
||||||
|
|
||||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument("cppfile", help="Name of file to modify.")
|
|
||||||
|
|
||||||
parser.add_argument("buildcmd", help="Build command")
|
|
||||||
|
|
||||||
parser.add_argument("execmd", help="Run command")
|
|
||||||
|
|
||||||
parser.add_argument("-v", "--verbose", action="store_true",
|
|
||||||
help="Print extra information")
|
|
||||||
|
|
||||||
parser.add_argument("-s", "--start", type=int, default=1,
|
|
||||||
help="Starting number of dupes")
|
|
||||||
|
|
||||||
parser.add_argument("-e", "--end", type=int, default=1000,
|
|
||||||
help="Ending number of dupes")
|
|
||||||
|
|
||||||
parser.add_argument("-n", "--repeat", type=int, default=10,
|
|
||||||
help="Number of times to repeat an individial execution. Best value will be taken.")
|
|
||||||
|
|
||||||
parser.add_argument("-t", "--template", action="store_true",
|
|
||||||
help="Use templating instead of source copying to increase object size")
|
|
||||||
|
|
||||||
parser.add_argument("-c", "--csv", action="store_true",
|
|
||||||
help="Print results as CSV")
|
|
||||||
|
|
||||||
args = parser.parse_args(args[1:])
|
|
||||||
|
|
||||||
if (args.verbose):
|
|
||||||
global VERBOSE
|
|
||||||
VERBOSE = True
|
|
||||||
|
|
||||||
return args.cppfile, args.buildcmd, args.execmd, args.start, args.end, args.repeat, args.template, args.csv
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
def verbose_print(msg, override=None):
|
|
||||||
###############################################################################
|
|
||||||
if ( (VERBOSE and not override is False) or override):
|
|
||||||
print msg
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
def error_print(msg):
|
|
||||||
###############################################################################
|
|
||||||
print >> sys.stderr, msg
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
def expect(condition, error_msg):
|
|
||||||
###############################################################################
|
|
||||||
"""
|
|
||||||
Similar to assert except doesn't generate an ugly stacktrace. Useful for
|
|
||||||
checking user error, not programming error.
|
|
||||||
"""
|
|
||||||
if (not condition):
|
|
||||||
raise SystemExit("FAIL: %s" % error_msg)
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
def run_cmd(cmd, ok_to_fail=False, input_str=None, from_dir=None, verbose=None,
|
|
||||||
arg_stdout=subprocess.PIPE, arg_stderr=subprocess.PIPE):
|
|
||||||
###############################################################################
|
|
||||||
verbose_print("RUN: %s" % cmd, verbose)
|
|
||||||
|
|
||||||
if (input_str is not None):
|
|
||||||
stdin = subprocess.PIPE
|
|
||||||
else:
|
|
||||||
stdin = None
|
|
||||||
|
|
||||||
proc = subprocess.Popen(cmd,
|
|
||||||
shell=True,
|
|
||||||
stdout=arg_stdout,
|
|
||||||
stderr=arg_stderr,
|
|
||||||
stdin=stdin,
|
|
||||||
cwd=from_dir)
|
|
||||||
output, errput = proc.communicate(input_str)
|
|
||||||
output = output.strip() if output is not None else output
|
|
||||||
stat = proc.wait()
|
|
||||||
|
|
||||||
if (ok_to_fail):
|
|
||||||
return stat, output, errput
|
|
||||||
else:
|
|
||||||
if (arg_stderr is not None):
|
|
||||||
errput = errput if errput is not None else open(arg_stderr.name, "r").read()
|
|
||||||
expect(stat == 0, "Command: '%s' failed with error '%s'" % (cmd, errput))
|
|
||||||
else:
|
|
||||||
expect(stat == 0, "Command: '%s' failed. See terminal output" % cmd)
|
|
||||||
return output
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
def build_and_run(source, cppfile, buildcmd, execmd, repeat):
|
|
||||||
###############################################################################
|
|
||||||
open(cppfile, 'w').writelines(source)
|
|
||||||
|
|
||||||
run_cmd(buildcmd)
|
|
||||||
|
|
||||||
best = None
|
|
||||||
for i in xrange(repeat):
|
|
||||||
wait_for_quiet_machine()
|
|
||||||
output = run_cmd(execmd)
|
|
||||||
|
|
||||||
current = None
|
|
||||||
fom_regex = re.compile(r'^FOM: ([0-9.]+)$')
|
|
||||||
for line in output.splitlines():
|
|
||||||
m = fom_regex.match(line)
|
|
||||||
if (m is not None):
|
|
||||||
current = float(m.groups()[0])
|
|
||||||
break
|
|
||||||
|
|
||||||
expect(current is not None, "No lines in output matched FOM regex")
|
|
||||||
|
|
||||||
if (best is None or best < current):
|
|
||||||
best = current
|
|
||||||
|
|
||||||
return best
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
def wait_for_quiet_machine():
|
|
||||||
###############################################################################
|
|
||||||
while(True):
|
|
||||||
time.sleep(2)
|
|
||||||
|
|
||||||
# The first iteration of top gives garbage results
|
|
||||||
idle_pct_raw = run_cmd("top -bn2 | grep 'Cpu(s)' | tr ',' ' ' | tail -n 1 | awk '{print $5}'")
|
|
||||||
|
|
||||||
idle_pct_re = re.compile(r'^([0-9.]+)%id$')
|
|
||||||
m = idle_pct_re.match(idle_pct_raw)
|
|
||||||
|
|
||||||
expect(m is not None, "top not returning output in expected form")
|
|
||||||
|
|
||||||
idle_pct = float(m.groups()[0])
|
|
||||||
if (idle_pct < 95):
|
|
||||||
error_print("Machine is too busy, waiting for it to become free")
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
def add_n_dupes(curr_lines, num_dupes, template):
|
|
||||||
###############################################################################
|
|
||||||
function_name = None
|
|
||||||
function_invocation = None
|
|
||||||
function_lines = []
|
|
||||||
|
|
||||||
function_re = re.compile(r'^.* (\w+) *[(]')
|
|
||||||
function_inv_re = re.compile(r'^.*JGF_DUPE: +(.+)$')
|
|
||||||
|
|
||||||
# Get function lines
|
|
||||||
record = False
|
|
||||||
definition_insertion_point = None
|
|
||||||
invocation_insertion_point = None
|
|
||||||
for idx, line in enumerate(curr_lines):
|
|
||||||
if ("JGF_DUPLICATE_BEGIN" in line):
|
|
||||||
record = True
|
|
||||||
m = function_re.match(curr_lines[idx+1])
|
|
||||||
expect(m is not None, "Could not find function in line '%s'" % curr_lines[idx+1])
|
|
||||||
function_name = m.groups()[0]
|
|
||||||
|
|
||||||
elif ("JGF_DUPLICATE_END" in line):
|
|
||||||
record = False
|
|
||||||
definition_insertion_point = idx + 1
|
|
||||||
|
|
||||||
elif (record):
|
|
||||||
function_lines.append(line)
|
|
||||||
|
|
||||||
elif ("JGF_DUPE" in line):
|
|
||||||
m = function_inv_re.match(line)
|
|
||||||
expect(m is not None, "Could not find function invocation example in line '%s'" % line)
|
|
||||||
function_invocation = m.groups()[0]
|
|
||||||
invocation_insertion_point = idx + 1
|
|
||||||
|
|
||||||
expect(function_name is not None, "Could not find name of dupe function")
|
|
||||||
expect(function_invocation is not None, "Could not find function invocation point")
|
|
||||||
|
|
||||||
expect(definition_insertion_point < invocation_insertion_point, "fix me")
|
|
||||||
|
|
||||||
dupe_func_defs = []
|
|
||||||
dupe_invocations = ["int jgf_rand = std::rand();\n", "if (false) {}\n"]
|
|
||||||
|
|
||||||
for i in xrange(num_dupes):
|
|
||||||
if (not template):
|
|
||||||
dupe_func = list(function_lines)
|
|
||||||
dupe_func[0] = dupe_func[0].replace(function_name, "%s%d" % (function_name, i))
|
|
||||||
dupe_func_defs.extend(dupe_func)
|
|
||||||
|
|
||||||
dupe_invocations.append("else if (jgf_rand == %d) " % i)
|
|
||||||
if (template):
|
|
||||||
dupe_call = function_invocation.replace(function_name, "%s<%d>" % (function_name, i)) + "\n"
|
|
||||||
else:
|
|
||||||
dupe_call = function_invocation.replace(function_name, "%s%d" % (function_name, i)) + "\n"
|
|
||||||
dupe_invocations.append(dupe_call)
|
|
||||||
|
|
||||||
curr_lines[invocation_insertion_point:invocation_insertion_point] = dupe_invocations
|
|
||||||
curr_lines[definition_insertion_point:definition_insertion_point] = dupe_func_defs
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
def report(num_dupes, curr_lines, object_file, orig_fom, curr_fom, csv=False, is_first_report=False):
|
|
||||||
###############################################################################
|
|
||||||
fom_change = (curr_fom - orig_fom) / orig_fom
|
|
||||||
|
|
||||||
if (csv):
|
|
||||||
if (is_first_report):
|
|
||||||
print "num_dupes, obj_byte_size, loc, fom, pct_diff"
|
|
||||||
|
|
||||||
print "%s, %s, %s, %s, %s" % (num_dupes, os.path.getsize(object_file), len(curr_lines), curr_fom, fom_change*100)
|
|
||||||
else:
|
|
||||||
print "========================================================"
|
|
||||||
print "For number of dupes:", num_dupes
|
|
||||||
print "Object file size (bytes):", os.path.getsize(object_file)
|
|
||||||
print "Lines of code:", len(curr_lines)
|
|
||||||
print "Field of merit:", curr_fom
|
|
||||||
print "Change pct:", fom_change*100
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
def obj_size_opt_check(cppfile, buildcmd, execmd, start, end, repeat, template, csv=False):
|
|
||||||
###############################################################################
|
|
||||||
orig_source_lines = open(cppfile, 'r').readlines()
|
|
||||||
|
|
||||||
backup_file = "%s.orig" % cppfile
|
|
||||||
object_file = "%s.o" % os.path.splitext(cppfile)[0]
|
|
||||||
os.rename(cppfile, backup_file)
|
|
||||||
|
|
||||||
orig_fom = build_and_run(orig_source_lines, cppfile, buildcmd, execmd, repeat)
|
|
||||||
report(0, orig_source_lines, object_file, orig_fom, orig_fom, csv=csv, is_first_report=True)
|
|
||||||
|
|
||||||
i = start
|
|
||||||
while (i < end):
|
|
||||||
curr_lines = list(orig_source_lines)
|
|
||||||
add_n_dupes(curr_lines, i, template)
|
|
||||||
|
|
||||||
curr_fom = build_and_run(curr_lines, cppfile, buildcmd, execmd, repeat)
|
|
||||||
|
|
||||||
report(i, curr_lines, object_file, orig_fom, curr_fom, csv=csv)
|
|
||||||
|
|
||||||
i *= 2 # make growth function configurable?
|
|
||||||
|
|
||||||
os.remove(cppfile)
|
|
||||||
os.rename(backup_file, cppfile)
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
def _main_func(description):
|
|
||||||
###############################################################################
|
|
||||||
if ("--test" in sys.argv):
|
|
||||||
test_results = doctest.testmod(verbose=True)
|
|
||||||
sys.exit(1 if test_results.failed > 0 else 0)
|
|
||||||
|
|
||||||
cppfile, buildcmd, execmd, start, end, repeat, template, csv = parse_command_line(sys.argv, description)
|
|
||||||
|
|
||||||
obj_size_opt_check(cppfile, buildcmd, execmd, start, end, repeat, template, csv)
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
if (__name__ == "__main__"):
|
|
||||||
_main_func(__doc__)
|
|
||||||
@ -1,10 +0,0 @@
|
|||||||
|
|
||||||
|
|
||||||
TRIBITS_SUBPACKAGE(Containers)
|
|
||||||
|
|
||||||
ADD_SUBDIRECTORY(src)
|
|
||||||
|
|
||||||
TRIBITS_ADD_TEST_DIRECTORIES(unit_tests)
|
|
||||||
TRIBITS_ADD_TEST_DIRECTORIES(performance_tests)
|
|
||||||
|
|
||||||
TRIBITS_SUBPACKAGE_POSTPROCESS()
|
|
||||||
@ -1,5 +0,0 @@
|
|||||||
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
|
|
||||||
LIB_REQUIRED_PACKAGES KokkosCore
|
|
||||||
LIB_OPTIONAL_TPLS Pthread CUDA HWLOC
|
|
||||||
TEST_OPTIONAL_TPLS CUSPARSE
|
|
||||||
)
|
|
||||||
@ -1,4 +0,0 @@
|
|||||||
#ifndef KOKKOS_CONTAINERS_CONFIG_H
|
|
||||||
#define KOKKOS_CONTAINERS_CONFIG_H
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@ -1,26 +0,0 @@
|
|||||||
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
|
|
||||||
|
|
||||||
SET(SOURCES
|
|
||||||
TestMain.cpp
|
|
||||||
TestCuda.cpp
|
|
||||||
)
|
|
||||||
|
|
||||||
IF(Kokkos_ENABLE_Pthread)
|
|
||||||
LIST( APPEND SOURCES TestThreads.cpp)
|
|
||||||
ENDIF()
|
|
||||||
|
|
||||||
IF(Kokkos_ENABLE_OpenMP)
|
|
||||||
LIST( APPEND SOURCES TestOpenMP.cpp)
|
|
||||||
ENDIF()
|
|
||||||
|
|
||||||
TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
|
||||||
PerformanceTest
|
|
||||||
SOURCES ${SOURCES}
|
|
||||||
COMM serial mpi
|
|
||||||
NUM_MPI_PROCS 1
|
|
||||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
|
||||||
TESTONLYLIBS kokkos_gtest
|
|
||||||
)
|
|
||||||
@ -1,81 +0,0 @@
|
|||||||
KOKKOS_PATH = ../..
|
|
||||||
|
|
||||||
GTEST_PATH = ../../TPL/gtest
|
|
||||||
|
|
||||||
vpath %.cpp ${KOKKOS_PATH}/containers/performance_tests
|
|
||||||
|
|
||||||
default: build_all
|
|
||||||
echo "End Build"
|
|
||||||
|
|
||||||
|
|
||||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|
||||||
CXX = $(NVCC_WRAPPER)
|
|
||||||
CXXFLAGS ?= -O3
|
|
||||||
LINK = $(CXX)
|
|
||||||
LDFLAGS ?= -lpthread
|
|
||||||
else
|
|
||||||
CXX ?= g++
|
|
||||||
CXXFLAGS ?= -O3
|
|
||||||
LINK ?= $(CXX)
|
|
||||||
LDFLAGS ?= -lpthread
|
|
||||||
endif
|
|
||||||
|
|
||||||
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests
|
|
||||||
|
|
||||||
TEST_TARGETS =
|
|
||||||
TARGETS =
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|
||||||
OBJ_CUDA = TestCuda.o TestMain.o gtest-all.o
|
|
||||||
TARGETS += KokkosContainers_PerformanceTest_Cuda
|
|
||||||
TEST_TARGETS += test-cuda
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
|
||||||
OBJ_THREADS = TestThreads.o TestMain.o gtest-all.o
|
|
||||||
TARGETS += KokkosContainers_PerformanceTest_Threads
|
|
||||||
TEST_TARGETS += test-threads
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
|
||||||
OBJ_OPENMP = TestOpenMP.o TestMain.o gtest-all.o
|
|
||||||
TARGETS += KokkosContainers_PerformanceTest_OpenMP
|
|
||||||
TEST_TARGETS += test-openmp
|
|
||||||
endif
|
|
||||||
|
|
||||||
KokkosContainers_PerformanceTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Cuda
|
|
||||||
|
|
||||||
KokkosContainers_PerformanceTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Threads
|
|
||||||
|
|
||||||
KokkosContainers_PerformanceTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_OpenMP
|
|
||||||
|
|
||||||
test-cuda: KokkosContainers_PerformanceTest_Cuda
|
|
||||||
./KokkosContainers_PerformanceTest_Cuda
|
|
||||||
|
|
||||||
test-threads: KokkosContainers_PerformanceTest_Threads
|
|
||||||
./KokkosContainers_PerformanceTest_Threads
|
|
||||||
|
|
||||||
test-openmp: KokkosContainers_PerformanceTest_OpenMP
|
|
||||||
./KokkosContainers_PerformanceTest_OpenMP
|
|
||||||
|
|
||||||
|
|
||||||
build_all: $(TARGETS)
|
|
||||||
|
|
||||||
test: $(TEST_TARGETS)
|
|
||||||
|
|
||||||
clean: kokkos-clean
|
|
||||||
rm -f *.o $(TARGETS)
|
|
||||||
|
|
||||||
# Compilation rules
|
|
||||||
|
|
||||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
|
||||||
|
|
||||||
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
|
|
||||||
|
|
||||||
@ -1,100 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <string>
|
|
||||||
#include <iostream>
|
|
||||||
#include <iomanip>
|
|
||||||
#include <sstream>
|
|
||||||
#include <fstream>
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#if defined( KOKKOS_HAVE_CUDA )
|
|
||||||
|
|
||||||
#include <Kokkos_UnorderedMap.hpp>
|
|
||||||
|
|
||||||
#include <TestGlobal2LocalIds.hpp>
|
|
||||||
|
|
||||||
#include <TestUnorderedMapPerformance.hpp>
|
|
||||||
|
|
||||||
namespace Performance {
|
|
||||||
|
|
||||||
class cuda : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
static void SetUpTestCase()
|
|
||||||
{
|
|
||||||
std::cout << std::setprecision(5) << std::scientific;
|
|
||||||
Kokkos::HostSpace::execution_space::initialize();
|
|
||||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
|
|
||||||
}
|
|
||||||
static void TearDownTestCase()
|
|
||||||
{
|
|
||||||
Kokkos::Cuda::finalize();
|
|
||||||
Kokkos::HostSpace::execution_space::finalize();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F( cuda, global_2_local)
|
|
||||||
{
|
|
||||||
std::cout << "Cuda" << std::endl;
|
|
||||||
std::cout << "size, create, generate, fill, find" << std::endl;
|
|
||||||
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
|
|
||||||
test_global_to_local_ids<Kokkos::Cuda>(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F( cuda, unordered_map_performance_near)
|
|
||||||
{
|
|
||||||
Perf::run_performance_tests<Kokkos::Cuda,true>("cuda-near");
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F( cuda, unordered_map_performance_far)
|
|
||||||
{
|
|
||||||
Perf::run_performance_tests<Kokkos::Cuda,false>("cuda-far");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
|
||||||
@ -1,231 +0,0 @@
|
|||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
|
|
||||||
#ifndef KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
|
|
||||||
#define KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
#include <Kokkos_UnorderedMap.hpp>
|
|
||||||
#include <vector>
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include <impl/Kokkos_Timer.hpp>
|
|
||||||
|
|
||||||
// This test will simulate global ids
|
|
||||||
|
|
||||||
namespace Performance {
|
|
||||||
|
|
||||||
static const unsigned begin_id_size = 256u;
|
|
||||||
static const unsigned end_id_size = 1u << 22;
|
|
||||||
static const unsigned id_step = 2u;
|
|
||||||
|
|
||||||
union helper
|
|
||||||
{
|
|
||||||
uint32_t word;
|
|
||||||
uint8_t byte[4];
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template <typename Device>
|
|
||||||
struct generate_ids
|
|
||||||
{
|
|
||||||
typedef Device execution_space;
|
|
||||||
typedef typename execution_space::size_type size_type;
|
|
||||||
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
|
|
||||||
|
|
||||||
local_id_view local_2_global;
|
|
||||||
|
|
||||||
generate_ids( local_id_view & ids)
|
|
||||||
: local_2_global(ids)
|
|
||||||
{
|
|
||||||
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(size_type i) const
|
|
||||||
{
|
|
||||||
|
|
||||||
helper x = {static_cast<uint32_t>(i)};
|
|
||||||
|
|
||||||
// shuffle the bytes of i to create a unique, semi-random global_id
|
|
||||||
x.word = ~x.word;
|
|
||||||
|
|
||||||
uint8_t tmp = x.byte[3];
|
|
||||||
x.byte[3] = x.byte[1];
|
|
||||||
x.byte[1] = tmp;
|
|
||||||
|
|
||||||
tmp = x.byte[2];
|
|
||||||
x.byte[2] = x.byte[0];
|
|
||||||
x.byte[0] = tmp;
|
|
||||||
|
|
||||||
local_2_global[i] = x.word;
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Device>
|
|
||||||
struct fill_map
|
|
||||||
{
|
|
||||||
typedef Device execution_space;
|
|
||||||
typedef typename execution_space::size_type size_type;
|
|
||||||
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
|
|
||||||
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
|
|
||||||
|
|
||||||
global_id_view global_2_local;
|
|
||||||
local_id_view local_2_global;
|
|
||||||
|
|
||||||
fill_map( global_id_view gIds, local_id_view lIds)
|
|
||||||
: global_2_local(gIds) , local_2_global(lIds)
|
|
||||||
{
|
|
||||||
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(size_type i) const
|
|
||||||
{
|
|
||||||
global_2_local.insert( local_2_global[i], i);
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Device>
|
|
||||||
struct find_test
|
|
||||||
{
|
|
||||||
typedef Device execution_space;
|
|
||||||
typedef typename execution_space::size_type size_type;
|
|
||||||
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
|
|
||||||
typedef Kokkos::UnorderedMap<const uint32_t, const size_type,execution_space> global_id_view;
|
|
||||||
|
|
||||||
global_id_view global_2_local;
|
|
||||||
local_id_view local_2_global;
|
|
||||||
|
|
||||||
typedef size_t value_type;
|
|
||||||
|
|
||||||
find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors)
|
|
||||||
: global_2_local(gIds) , local_2_global(lIds)
|
|
||||||
{
|
|
||||||
Kokkos::parallel_reduce(local_2_global.dimension_0(), *this, num_errors);
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void init(value_type & v) const
|
|
||||||
{ v = 0; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void join(volatile value_type & dst, volatile value_type const & src) const
|
|
||||||
{ dst += src; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(size_type i, value_type & num_errors) const
|
|
||||||
{
|
|
||||||
uint32_t index = global_2_local.find( local_2_global[i] );
|
|
||||||
|
|
||||||
if ( global_2_local.value_at(index) != i) ++num_errors;
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Device>
|
|
||||||
void test_global_to_local_ids(unsigned num_ids)
|
|
||||||
{
|
|
||||||
|
|
||||||
typedef Device execution_space;
|
|
||||||
typedef typename execution_space::size_type size_type;
|
|
||||||
|
|
||||||
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
|
|
||||||
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
|
|
||||||
|
|
||||||
//size
|
|
||||||
std::cout << num_ids << ", ";
|
|
||||||
|
|
||||||
double elasped_time = 0;
|
|
||||||
Kokkos::Impl::Timer timer;
|
|
||||||
|
|
||||||
local_id_view local_2_global("local_ids", num_ids);
|
|
||||||
global_id_view global_2_local((3u*num_ids)/2u);
|
|
||||||
|
|
||||||
//create
|
|
||||||
elasped_time = timer.seconds();
|
|
||||||
std::cout << elasped_time << ", ";
|
|
||||||
timer.reset();
|
|
||||||
|
|
||||||
// generate unique ids
|
|
||||||
{
|
|
||||||
generate_ids<Device> gen(local_2_global);
|
|
||||||
}
|
|
||||||
Device::fence();
|
|
||||||
// generate
|
|
||||||
elasped_time = timer.seconds();
|
|
||||||
std::cout << elasped_time << ", ";
|
|
||||||
timer.reset();
|
|
||||||
|
|
||||||
{
|
|
||||||
fill_map<Device> fill(global_2_local, local_2_global);
|
|
||||||
}
|
|
||||||
Device::fence();
|
|
||||||
|
|
||||||
// fill
|
|
||||||
elasped_time = timer.seconds();
|
|
||||||
std::cout << elasped_time << ", ";
|
|
||||||
timer.reset();
|
|
||||||
|
|
||||||
|
|
||||||
size_t num_errors = 0;
|
|
||||||
for (int i=0; i<100; ++i)
|
|
||||||
{
|
|
||||||
find_test<Device> find(global_2_local, local_2_global,num_errors);
|
|
||||||
}
|
|
||||||
Device::fence();
|
|
||||||
|
|
||||||
// find
|
|
||||||
elasped_time = timer.seconds();
|
|
||||||
std::cout << elasped_time << std::endl;
|
|
||||||
|
|
||||||
ASSERT_EQ( num_errors, 0u);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace Performance
|
|
||||||
|
|
||||||
|
|
||||||
#endif //KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
|
|
||||||
|
|
||||||
@ -1,50 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
|
||||||
::testing::InitGoogleTest(&argc,argv);
|
|
||||||
return RUN_ALL_TESTS();
|
|
||||||
}
|
|
||||||
|
|
||||||
@ -1,131 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#include <Kokkos_UnorderedMap.hpp>
|
|
||||||
|
|
||||||
#include <TestGlobal2LocalIds.hpp>
|
|
||||||
#include <TestUnorderedMapPerformance.hpp>
|
|
||||||
|
|
||||||
#include <iomanip>
|
|
||||||
#include <sstream>
|
|
||||||
#include <string>
|
|
||||||
#include <fstream>
|
|
||||||
|
|
||||||
|
|
||||||
namespace Performance {
|
|
||||||
|
|
||||||
class openmp : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
static void SetUpTestCase()
|
|
||||||
{
|
|
||||||
std::cout << std::setprecision(5) << std::scientific;
|
|
||||||
|
|
||||||
unsigned num_threads = 4;
|
|
||||||
|
|
||||||
if (Kokkos::hwloc::available()) {
|
|
||||||
num_threads = Kokkos::hwloc::get_available_numa_count()
|
|
||||||
* Kokkos::hwloc::get_available_cores_per_numa()
|
|
||||||
* Kokkos::hwloc::get_available_threads_per_core()
|
|
||||||
;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "OpenMP: " << num_threads << std::endl;
|
|
||||||
|
|
||||||
Kokkos::OpenMP::initialize( num_threads );
|
|
||||||
|
|
||||||
std::cout << "available threads: " << omp_get_max_threads() << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void TearDownTestCase()
|
|
||||||
{
|
|
||||||
Kokkos::OpenMP::finalize();
|
|
||||||
|
|
||||||
omp_set_num_threads(1);
|
|
||||||
|
|
||||||
ASSERT_EQ( 1 , omp_get_max_threads() );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F( openmp, global_2_local)
|
|
||||||
{
|
|
||||||
std::cout << "OpenMP" << std::endl;
|
|
||||||
std::cout << "size, create, generate, fill, find" << std::endl;
|
|
||||||
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
|
|
||||||
test_global_to_local_ids<Kokkos::OpenMP>(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F( openmp, unordered_map_performance_near)
|
|
||||||
{
|
|
||||||
unsigned num_openmp = 4;
|
|
||||||
if (Kokkos::hwloc::available()) {
|
|
||||||
num_openmp = Kokkos::hwloc::get_available_numa_count() *
|
|
||||||
Kokkos::hwloc::get_available_cores_per_numa() *
|
|
||||||
Kokkos::hwloc::get_available_threads_per_core();
|
|
||||||
|
|
||||||
}
|
|
||||||
std::ostringstream base_file_name;
|
|
||||||
base_file_name << "openmp-" << num_openmp << "-near";
|
|
||||||
Perf::run_performance_tests<Kokkos::OpenMP,true>(base_file_name.str());
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F( openmp, unordered_map_performance_far)
|
|
||||||
{
|
|
||||||
unsigned num_openmp = 4;
|
|
||||||
if (Kokkos::hwloc::available()) {
|
|
||||||
num_openmp = Kokkos::hwloc::get_available_numa_count() *
|
|
||||||
Kokkos::hwloc::get_available_cores_per_numa() *
|
|
||||||
Kokkos::hwloc::get_available_threads_per_core();
|
|
||||||
|
|
||||||
}
|
|
||||||
std::ostringstream base_file_name;
|
|
||||||
base_file_name << "openmp-" << num_openmp << "-far";
|
|
||||||
Perf::run_performance_tests<Kokkos::OpenMP,false>(base_file_name.str());
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace test
|
|
||||||
|
|
||||||
@ -1,126 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#include <Kokkos_UnorderedMap.hpp>
|
|
||||||
|
|
||||||
#include <iomanip>
|
|
||||||
|
|
||||||
#include <TestGlobal2LocalIds.hpp>
|
|
||||||
#include <TestUnorderedMapPerformance.hpp>
|
|
||||||
|
|
||||||
#include <iomanip>
|
|
||||||
#include <sstream>
|
|
||||||
#include <string>
|
|
||||||
#include <fstream>
|
|
||||||
|
|
||||||
namespace Performance {
|
|
||||||
|
|
||||||
class threads : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
static void SetUpTestCase()
|
|
||||||
{
|
|
||||||
std::cout << std::setprecision(5) << std::scientific;
|
|
||||||
|
|
||||||
unsigned num_threads = 4;
|
|
||||||
|
|
||||||
if (Kokkos::hwloc::available()) {
|
|
||||||
num_threads = Kokkos::hwloc::get_available_numa_count() *
|
|
||||||
Kokkos::hwloc::get_available_cores_per_numa() *
|
|
||||||
Kokkos::hwloc::get_available_threads_per_core();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "Threads: " << num_threads << std::endl;
|
|
||||||
|
|
||||||
Kokkos::Threads::initialize( num_threads );
|
|
||||||
}
|
|
||||||
|
|
||||||
static void TearDownTestCase()
|
|
||||||
{
|
|
||||||
Kokkos::Threads::finalize();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F( threads, global_2_local)
|
|
||||||
{
|
|
||||||
std::cout << "Threads" << std::endl;
|
|
||||||
std::cout << "size, create, generate, fill, find" << std::endl;
|
|
||||||
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
|
|
||||||
test_global_to_local_ids<Kokkos::Threads>(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F( threads, unordered_map_performance_near)
|
|
||||||
{
|
|
||||||
unsigned num_threads = 4;
|
|
||||||
if (Kokkos::hwloc::available()) {
|
|
||||||
num_threads = Kokkos::hwloc::get_available_numa_count() *
|
|
||||||
Kokkos::hwloc::get_available_cores_per_numa() *
|
|
||||||
Kokkos::hwloc::get_available_threads_per_core();
|
|
||||||
|
|
||||||
}
|
|
||||||
std::ostringstream base_file_name;
|
|
||||||
base_file_name << "threads-" << num_threads << "-near";
|
|
||||||
Perf::run_performance_tests<Kokkos::Threads,true>(base_file_name.str());
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F( threads, unordered_map_performance_far)
|
|
||||||
{
|
|
||||||
unsigned num_threads = 4;
|
|
||||||
if (Kokkos::hwloc::available()) {
|
|
||||||
num_threads = Kokkos::hwloc::get_available_numa_count() *
|
|
||||||
Kokkos::hwloc::get_available_cores_per_numa() *
|
|
||||||
Kokkos::hwloc::get_available_threads_per_core();
|
|
||||||
|
|
||||||
}
|
|
||||||
std::ostringstream base_file_name;
|
|
||||||
base_file_name << "threads-" << num_threads << "-far";
|
|
||||||
Perf::run_performance_tests<Kokkos::Threads,false>(base_file_name.str());
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Performance
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,262 +0,0 @@
|
|||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
|
|
||||||
#ifndef KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
|
|
||||||
#define KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
|
|
||||||
|
|
||||||
#include <impl/Kokkos_Timer.hpp>
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <iomanip>
|
|
||||||
#include <fstream>
|
|
||||||
#include <string>
|
|
||||||
#include <sstream>
|
|
||||||
|
|
||||||
|
|
||||||
namespace Perf {
|
|
||||||
|
|
||||||
template <typename Device, bool Near>
|
|
||||||
struct UnorderedMapTest
|
|
||||||
{
|
|
||||||
typedef Device execution_space;
|
|
||||||
typedef Kokkos::UnorderedMap<uint32_t, uint32_t, execution_space> map_type;
|
|
||||||
typedef typename map_type::histogram_type histogram_type;
|
|
||||||
|
|
||||||
struct value_type {
|
|
||||||
uint32_t failed_count;
|
|
||||||
uint32_t max_list;
|
|
||||||
};
|
|
||||||
|
|
||||||
uint32_t capacity;
|
|
||||||
uint32_t inserts;
|
|
||||||
uint32_t collisions;
|
|
||||||
double seconds;
|
|
||||||
map_type map;
|
|
||||||
histogram_type histogram;
|
|
||||||
|
|
||||||
UnorderedMapTest( uint32_t arg_capacity, uint32_t arg_inserts, uint32_t arg_collisions)
|
|
||||||
: capacity(arg_capacity)
|
|
||||||
, inserts(arg_inserts)
|
|
||||||
, collisions(arg_collisions)
|
|
||||||
, seconds(0)
|
|
||||||
, map(capacity)
|
|
||||||
, histogram(map.get_histogram())
|
|
||||||
{
|
|
||||||
Kokkos::Impl::Timer wall_clock ;
|
|
||||||
wall_clock.reset();
|
|
||||||
|
|
||||||
value_type v = {};
|
|
||||||
int loop_count = 0;
|
|
||||||
do {
|
|
||||||
++loop_count;
|
|
||||||
|
|
||||||
v = value_type();
|
|
||||||
Kokkos::parallel_reduce(inserts, *this, v);
|
|
||||||
|
|
||||||
if (v.failed_count > 0u) {
|
|
||||||
const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + v.failed_count/collisions ;
|
|
||||||
map.rehash( new_capacity );
|
|
||||||
}
|
|
||||||
} while (v.failed_count > 0u);
|
|
||||||
|
|
||||||
seconds = wall_clock.seconds();
|
|
||||||
|
|
||||||
switch (loop_count)
|
|
||||||
{
|
|
||||||
case 1u: std::cout << " \033[0;32m" << loop_count << "\033[0m "; break;
|
|
||||||
case 2u: std::cout << " \033[1;31m" << loop_count << "\033[0m "; break;
|
|
||||||
default: std::cout << " \033[0;31m" << loop_count << "\033[0m "; break;
|
|
||||||
}
|
|
||||||
std::cout << std::setprecision(2) << std::fixed << std::setw(5) << (1e9*(seconds/(inserts))) << "; " << std::flush;
|
|
||||||
|
|
||||||
histogram.calculate();
|
|
||||||
Device::fence();
|
|
||||||
}
|
|
||||||
|
|
||||||
void print(std::ostream & metrics_out, std::ostream & length_out, std::ostream & distance_out, std::ostream & block_distance_out)
|
|
||||||
{
|
|
||||||
metrics_out << map.capacity() << " , ";
|
|
||||||
metrics_out << inserts/collisions << " , ";
|
|
||||||
metrics_out << (100.0 * inserts/collisions) / map.capacity() << " , ";
|
|
||||||
metrics_out << inserts << " , ";
|
|
||||||
metrics_out << (map.failed_insert() ? "true" : "false") << " , ";
|
|
||||||
metrics_out << collisions << " , ";
|
|
||||||
metrics_out << 1e9*(seconds/inserts) << " , ";
|
|
||||||
metrics_out << seconds << std::endl;
|
|
||||||
|
|
||||||
length_out << map.capacity() << " , ";
|
|
||||||
length_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
|
|
||||||
length_out << collisions << " , ";
|
|
||||||
histogram.print_length(length_out);
|
|
||||||
|
|
||||||
distance_out << map.capacity() << " , ";
|
|
||||||
distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
|
|
||||||
distance_out << collisions << " , ";
|
|
||||||
histogram.print_distance(distance_out);
|
|
||||||
|
|
||||||
block_distance_out << map.capacity() << " , ";
|
|
||||||
block_distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
|
|
||||||
block_distance_out << collisions << " , ";
|
|
||||||
histogram.print_block_distance(block_distance_out);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void init( value_type & v ) const
|
|
||||||
{
|
|
||||||
v.failed_count = 0;
|
|
||||||
v.max_list = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void join( volatile value_type & dst, const volatile value_type & src ) const
|
|
||||||
{
|
|
||||||
dst.failed_count += src.failed_count;
|
|
||||||
dst.max_list = src.max_list < dst.max_list ? dst.max_list : src.max_list;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(uint32_t i, value_type & v) const
|
|
||||||
{
|
|
||||||
const uint32_t key = Near ? i/collisions : i%(inserts/collisions);
|
|
||||||
typename map_type::insert_result result = map.insert(key,i);
|
|
||||||
v.failed_count += !result.failed() ? 0 : 1;
|
|
||||||
v.max_list = result.list_position() < v.max_list ? v.max_list : result.list_position();
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
//#define KOKKOS_COLLECT_UNORDERED_MAP_METRICS
|
|
||||||
|
|
||||||
template <typename Device, bool Near>
|
|
||||||
void run_performance_tests(std::string const & base_file_name)
|
|
||||||
{
|
|
||||||
#if defined(KOKKOS_COLLECT_UNORDERED_MAP_METRICS)
|
|
||||||
std::string metrics_file_name = base_file_name + std::string("-metrics.csv");
|
|
||||||
std::string length_file_name = base_file_name + std::string("-length.csv");
|
|
||||||
std::string distance_file_name = base_file_name + std::string("-distance.csv");
|
|
||||||
std::string block_distance_file_name = base_file_name + std::string("-block_distance.csv");
|
|
||||||
|
|
||||||
std::ofstream metrics_out( metrics_file_name.c_str(), std::ofstream::out );
|
|
||||||
std::ofstream length_out( length_file_name.c_str(), std::ofstream::out );
|
|
||||||
std::ofstream distance_out( distance_file_name.c_str(), std::ofstream::out );
|
|
||||||
std::ofstream block_distance_out( block_distance_file_name.c_str(), std::ofstream::out );
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
const double test_ratios[] = {
|
|
||||||
0.50
|
|
||||||
, 0.75
|
|
||||||
, 0.80
|
|
||||||
, 0.85
|
|
||||||
, 0.90
|
|
||||||
, 0.95
|
|
||||||
, 1.00
|
|
||||||
, 1.25
|
|
||||||
, 2.00
|
|
||||||
};
|
|
||||||
*/
|
|
||||||
|
|
||||||
const double test_ratios[] = { 1.00 };
|
|
||||||
|
|
||||||
const int num_ratios = sizeof(test_ratios) / sizeof(double);
|
|
||||||
|
|
||||||
/*
|
|
||||||
const uint32_t collisions[] {
|
|
||||||
1
|
|
||||||
, 4
|
|
||||||
, 16
|
|
||||||
, 64
|
|
||||||
};
|
|
||||||
*/
|
|
||||||
|
|
||||||
const uint32_t collisions[] = { 16 };
|
|
||||||
|
|
||||||
const int num_collisions = sizeof(collisions) / sizeof(uint32_t);
|
|
||||||
|
|
||||||
// set up file headers
|
|
||||||
metrics_out << "Capacity , Unique , Percent Full , Attempted Inserts , Failed Inserts , Collision Ratio , Nanoseconds/Inserts, Seconds" << std::endl;
|
|
||||||
length_out << "Capacity , Percent Full , ";
|
|
||||||
distance_out << "Capacity , Percent Full , ";
|
|
||||||
block_distance_out << "Capacity , Percent Full , ";
|
|
||||||
|
|
||||||
for (int i=0; i<100; ++i) {
|
|
||||||
length_out << i << " , ";
|
|
||||||
distance_out << i << " , ";
|
|
||||||
block_distance_out << i << " , ";
|
|
||||||
}
|
|
||||||
|
|
||||||
length_out << "\b\b\b " << std::endl;
|
|
||||||
distance_out << "\b\b\b " << std::endl;
|
|
||||||
block_distance_out << "\b\b\b " << std::endl;
|
|
||||||
|
|
||||||
Kokkos::Impl::Timer wall_clock ;
|
|
||||||
for (int i=0; i < num_collisions ; ++i) {
|
|
||||||
wall_clock.reset();
|
|
||||||
std::cout << "Collisions: " << collisions[i] << std::endl;
|
|
||||||
for (int j = 0; j < num_ratios; ++j) {
|
|
||||||
std::cout << std::setprecision(1) << std::fixed << std::setw(5) << (100.0*test_ratios[j]) << "% " << std::flush;
|
|
||||||
for (uint32_t capacity = 1<<14; capacity < 1<<25; capacity = capacity << 1) {
|
|
||||||
uint32_t inserts = static_cast<uint32_t>(test_ratios[j]*(capacity));
|
|
||||||
std::cout << capacity << std::flush;
|
|
||||||
UnorderedMapTest<Device, Near> test(capacity, inserts*collisions[i], collisions[i]);
|
|
||||||
Device::fence();
|
|
||||||
test.print(metrics_out, length_out, distance_out, block_distance_out);
|
|
||||||
}
|
|
||||||
std::cout << "\b\b " << std::endl;
|
|
||||||
|
|
||||||
}
|
|
||||||
std::cout << " " << wall_clock.seconds() << " secs" << std::endl;
|
|
||||||
}
|
|
||||||
metrics_out.close();
|
|
||||||
length_out.close();
|
|
||||||
distance_out.close();
|
|
||||||
block_distance_out.close();
|
|
||||||
#else
|
|
||||||
(void)base_file_name;
|
|
||||||
std::cout << "skipping test" << std::endl;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace Perf
|
|
||||||
|
|
||||||
#endif //KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
|
|
||||||
@ -1,31 +0,0 @@
|
|||||||
|
|
||||||
TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h)
|
|
||||||
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
SET(HEADERS "")
|
|
||||||
SET(SOURCES "")
|
|
||||||
|
|
||||||
SET(HEADERS_IMPL "")
|
|
||||||
|
|
||||||
FILE(GLOB HEADERS *.hpp)
|
|
||||||
FILE(GLOB HEADERS_IMPL impl/*.hpp)
|
|
||||||
FILE(GLOB SOURCES impl/*.cpp)
|
|
||||||
|
|
||||||
SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
|
|
||||||
|
|
||||||
INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/)
|
|
||||||
|
|
||||||
TRIBITS_ADD_LIBRARY(
|
|
||||||
kokkoscontainers
|
|
||||||
HEADERS ${HEADERS}
|
|
||||||
NOINSTALLHEADERS ${HEADERS_IMPL}
|
|
||||||
SOURCES ${SOURCES}
|
|
||||||
DEPLIBS
|
|
||||||
)
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@ -1,437 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef KOKKOS_BITSET_HPP
|
|
||||||
#define KOKKOS_BITSET_HPP
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
#include <Kokkos_Functional.hpp>
|
|
||||||
|
|
||||||
#include <impl/Kokkos_Bitset_impl.hpp>
|
|
||||||
|
|
||||||
#include <stdexcept>
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
|
|
||||||
template <typename Device = Kokkos::DefaultExecutionSpace >
|
|
||||||
class Bitset;
|
|
||||||
|
|
||||||
template <typename Device = Kokkos::DefaultExecutionSpace >
|
|
||||||
class ConstBitset;
|
|
||||||
|
|
||||||
template <typename DstDevice, typename SrcDevice>
|
|
||||||
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
|
|
||||||
|
|
||||||
template <typename DstDevice, typename SrcDevice>
|
|
||||||
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
|
||||||
|
|
||||||
template <typename DstDevice, typename SrcDevice>
|
|
||||||
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
|
||||||
|
|
||||||
|
|
||||||
/// A thread safe view to a bitset
|
|
||||||
template <typename Device>
|
|
||||||
class Bitset
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
typedef Device execution_space;
|
|
||||||
typedef unsigned size_type;
|
|
||||||
|
|
||||||
enum { BIT_SCAN_REVERSE = 1u };
|
|
||||||
enum { MOVE_HINT_BACKWARD = 2u };
|
|
||||||
|
|
||||||
enum {
|
|
||||||
BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u
|
|
||||||
, BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE
|
|
||||||
, BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD
|
|
||||||
, BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD
|
|
||||||
};
|
|
||||||
|
|
||||||
private:
|
|
||||||
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
|
|
||||||
enum { block_mask = block_size-1u };
|
|
||||||
enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) };
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
|
|
||||||
/// constructor
|
|
||||||
/// arg_size := number of bit in set
|
|
||||||
Bitset(unsigned arg_size = 0u)
|
|
||||||
: m_size(arg_size)
|
|
||||||
, m_last_block_mask(0u)
|
|
||||||
, m_blocks("Bitset", ((m_size + block_mask) >> block_shift) )
|
|
||||||
{
|
|
||||||
for (int i=0, end = static_cast<int>(m_size & block_mask); i < end; ++i) {
|
|
||||||
m_last_block_mask |= 1u << i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// assignment
|
|
||||||
Bitset<Device> & operator = (Bitset<Device> const & rhs)
|
|
||||||
{
|
|
||||||
this->m_size = rhs.m_size;
|
|
||||||
this->m_last_block_mask = rhs.m_last_block_mask;
|
|
||||||
this->m_blocks = rhs.m_blocks;
|
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// copy constructor
|
|
||||||
Bitset( Bitset<Device> const & rhs)
|
|
||||||
: m_size( rhs.m_size )
|
|
||||||
, m_last_block_mask( rhs.m_last_block_mask )
|
|
||||||
, m_blocks( rhs.m_blocks )
|
|
||||||
{}
|
|
||||||
|
|
||||||
/// number of bits in the set
|
|
||||||
/// can be call from the host or the device
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
unsigned size() const
|
|
||||||
{ return m_size; }
|
|
||||||
|
|
||||||
/// number of bits which are set to 1
|
|
||||||
/// can only be called from the host
|
|
||||||
unsigned count() const
|
|
||||||
{
|
|
||||||
Impl::BitsetCount< Bitset<Device> > f(*this);
|
|
||||||
return f.apply();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// set all bits to 1
|
|
||||||
/// can only be called from the host
|
|
||||||
void set()
|
|
||||||
{
|
|
||||||
Kokkos::deep_copy(m_blocks, ~0u );
|
|
||||||
|
|
||||||
if (m_last_block_mask) {
|
|
||||||
//clear the unused bits in the last block
|
|
||||||
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
|
||||||
raw_deep_copy( m_blocks.ptr_on_device() + (m_blocks.dimension_0() -1u), &m_last_block_mask, sizeof(unsigned));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// set all bits to 0
|
|
||||||
/// can only be called from the host
|
|
||||||
void reset()
|
|
||||||
{
|
|
||||||
Kokkos::deep_copy(m_blocks, 0u );
|
|
||||||
}
|
|
||||||
|
|
||||||
/// set all bits to 0
|
|
||||||
/// can only be called from the host
|
|
||||||
void clear()
|
|
||||||
{
|
|
||||||
Kokkos::deep_copy(m_blocks, 0u );
|
|
||||||
}
|
|
||||||
|
|
||||||
/// set i'th bit to 1
|
|
||||||
/// can only be called from the device
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool set( unsigned i ) const
|
|
||||||
{
|
|
||||||
if ( i < m_size ) {
|
|
||||||
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
|
|
||||||
const unsigned mask = 1u << static_cast<int>( i & block_mask );
|
|
||||||
|
|
||||||
return !( atomic_fetch_or( block_ptr, mask ) & mask );
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// set i'th bit to 0
|
|
||||||
/// can only be called from the device
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool reset( unsigned i ) const
|
|
||||||
{
|
|
||||||
if ( i < m_size ) {
|
|
||||||
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
|
|
||||||
const unsigned mask = 1u << static_cast<int>( i & block_mask );
|
|
||||||
|
|
||||||
return atomic_fetch_and( block_ptr, ~mask ) & mask;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// return true if the i'th bit set to 1
|
|
||||||
/// can only be called from the device
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool test( unsigned i ) const
|
|
||||||
{
|
|
||||||
if ( i < m_size ) {
|
|
||||||
const unsigned block = volatile_load(&m_blocks[ i >> block_shift ]);
|
|
||||||
const unsigned mask = 1u << static_cast<int>( i & block_mask );
|
|
||||||
return block & mask;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// used with find_any_set_near or find_any_unset_near functions
|
|
||||||
/// returns the max number of times those functions should be call
|
|
||||||
/// when searching for an available bit
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
unsigned max_hint() const
|
|
||||||
{
|
|
||||||
return m_blocks.dimension_0();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// find a bit set to 1 near the hint
|
|
||||||
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
|
|
||||||
/// and if result.first is false the result.second is a new hint
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
Kokkos::pair<bool, unsigned> find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
|
|
||||||
{
|
|
||||||
const unsigned block_idx = (hint >> block_shift) < m_blocks.dimension_0() ? (hint >> block_shift) : 0;
|
|
||||||
const unsigned offset = hint & block_mask;
|
|
||||||
unsigned block = volatile_load(&m_blocks[ block_idx ]);
|
|
||||||
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1)) ? block : block & m_last_block_mask ;
|
|
||||||
|
|
||||||
return find_any_helper(block_idx, offset, block, scan_direction);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// find a bit set to 0 near the hint
|
|
||||||
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
|
|
||||||
/// and if result.first is false the result.second is a new hint
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
Kokkos::pair<bool, unsigned> find_any_unset_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
|
|
||||||
{
|
|
||||||
const unsigned block_idx = hint >> block_shift;
|
|
||||||
const unsigned offset = hint & block_mask;
|
|
||||||
unsigned block = volatile_load(&m_blocks[ block_idx ]);
|
|
||||||
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1) ) ? ~block : ~block & m_last_block_mask ;
|
|
||||||
|
|
||||||
return find_any_helper(block_idx, offset, block, scan_direction);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
Kokkos::pair<bool, unsigned> find_any_helper(unsigned block_idx, unsigned offset, unsigned block, unsigned scan_direction) const
|
|
||||||
{
|
|
||||||
Kokkos::pair<bool, unsigned> result( block > 0u, 0);
|
|
||||||
|
|
||||||
if (!result.first) {
|
|
||||||
result.second = update_hint( block_idx, offset, scan_direction );
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
result.second = scan_block( (block_idx << block_shift)
|
|
||||||
, offset
|
|
||||||
, block
|
|
||||||
, scan_direction
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
unsigned scan_block(unsigned block_start, int offset, unsigned block, unsigned scan_direction ) const
|
|
||||||
{
|
|
||||||
offset = !(scan_direction & BIT_SCAN_REVERSE) ? offset : (offset + block_mask) & block_mask;
|
|
||||||
block = Impl::rotate_right(block, offset);
|
|
||||||
return ((( !(scan_direction & BIT_SCAN_REVERSE) ?
|
|
||||||
Impl::bit_scan_forward(block) :
|
|
||||||
Impl::bit_scan_reverse(block)
|
|
||||||
) + offset
|
|
||||||
) & block_mask
|
|
||||||
) + block_start;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const
|
|
||||||
{
|
|
||||||
block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
|
|
||||||
block_idx = block_idx >= 0 ? block_idx : m_blocks.dimension_0() - 1;
|
|
||||||
block_idx = block_idx < static_cast<long long>(m_blocks.dimension_0()) ? block_idx : 0;
|
|
||||||
|
|
||||||
return static_cast<unsigned>(block_idx)*block_size + offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
|
|
||||||
unsigned m_size;
|
|
||||||
unsigned m_last_block_mask;
|
|
||||||
View< unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
|
|
||||||
|
|
||||||
private:
|
|
||||||
template <typename DDevice>
|
|
||||||
friend class Bitset;
|
|
||||||
|
|
||||||
template <typename DDevice>
|
|
||||||
friend class ConstBitset;
|
|
||||||
|
|
||||||
template <typename Bitset>
|
|
||||||
friend struct Impl::BitsetCount;
|
|
||||||
|
|
||||||
template <typename DstDevice, typename SrcDevice>
|
|
||||||
friend void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
|
|
||||||
|
|
||||||
template <typename DstDevice, typename SrcDevice>
|
|
||||||
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
|
||||||
};
|
|
||||||
|
|
||||||
/// a thread-safe view to a const bitset
|
|
||||||
/// i.e. can only test bits
|
|
||||||
template <typename Device>
|
|
||||||
class ConstBitset
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
typedef Device execution_space;
|
|
||||||
typedef unsigned size_type;
|
|
||||||
|
|
||||||
private:
|
|
||||||
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
|
|
||||||
enum { block_mask = block_size -1u };
|
|
||||||
enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) };
|
|
||||||
|
|
||||||
public:
|
|
||||||
ConstBitset()
|
|
||||||
: m_size (0)
|
|
||||||
{}
|
|
||||||
|
|
||||||
ConstBitset(Bitset<Device> const& rhs)
|
|
||||||
: m_size(rhs.m_size)
|
|
||||||
, m_blocks(rhs.m_blocks)
|
|
||||||
{}
|
|
||||||
|
|
||||||
ConstBitset(ConstBitset<Device> const& rhs)
|
|
||||||
: m_size( rhs.m_size )
|
|
||||||
, m_blocks( rhs.m_blocks )
|
|
||||||
{}
|
|
||||||
|
|
||||||
ConstBitset<Device> & operator = (Bitset<Device> const & rhs)
|
|
||||||
{
|
|
||||||
this->m_size = rhs.m_size;
|
|
||||||
this->m_blocks = rhs.m_blocks;
|
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
ConstBitset<Device> & operator = (ConstBitset<Device> const & rhs)
|
|
||||||
{
|
|
||||||
this->m_size = rhs.m_size;
|
|
||||||
this->m_blocks = rhs.m_blocks;
|
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
unsigned size() const
|
|
||||||
{
|
|
||||||
return m_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned count() const
|
|
||||||
{
|
|
||||||
Impl::BitsetCount< ConstBitset<Device> > f(*this);
|
|
||||||
return f.apply();
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool test( unsigned i ) const
|
|
||||||
{
|
|
||||||
if ( i < m_size ) {
|
|
||||||
const unsigned block = m_blocks[ i >> block_shift ];
|
|
||||||
const unsigned mask = 1u << static_cast<int>( i & block_mask );
|
|
||||||
return block & mask;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
|
|
||||||
unsigned m_size;
|
|
||||||
View< const unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
|
|
||||||
|
|
||||||
private:
|
|
||||||
template <typename DDevice>
|
|
||||||
friend class ConstBitset;
|
|
||||||
|
|
||||||
template <typename Bitset>
|
|
||||||
friend struct Impl::BitsetCount;
|
|
||||||
|
|
||||||
template <typename DstDevice, typename SrcDevice>
|
|
||||||
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
|
||||||
|
|
||||||
template <typename DstDevice, typename SrcDevice>
|
|
||||||
friend void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template <typename DstDevice, typename SrcDevice>
|
|
||||||
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src)
|
|
||||||
{
|
|
||||||
if (dst.size() != src.size()) {
|
|
||||||
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
|
|
||||||
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename DstDevice, typename SrcDevice>
|
|
||||||
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
|
|
||||||
{
|
|
||||||
if (dst.size() != src.size()) {
|
|
||||||
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
|
|
||||||
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename DstDevice, typename SrcDevice>
|
|
||||||
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
|
|
||||||
{
|
|
||||||
if (dst.size() != src.size()) {
|
|
||||||
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
|
|
||||||
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Kokkos
|
|
||||||
|
|
||||||
#endif //KOKKOS_BITSET_HPP
|
|
||||||
@ -1,982 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
/// \file Kokkos_DualView.hpp
|
|
||||||
/// \brief Declaration and definition of Kokkos::DualView.
|
|
||||||
///
|
|
||||||
/// This header file declares and defines Kokkos::DualView and its
|
|
||||||
/// related nonmember functions.
|
|
||||||
|
|
||||||
#ifndef KOKKOS_DUALVIEW_HPP
|
|
||||||
#define KOKKOS_DUALVIEW_HPP
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
#include <impl/Kokkos_Error.hpp>
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
|
|
||||||
/* \class DualView
|
|
||||||
* \brief Container to manage mirroring a Kokkos::View that lives
|
|
||||||
* in device memory with a Kokkos::View that lives in host memory.
|
|
||||||
*
|
|
||||||
* This class provides capabilities to manage data which exists in two
|
|
||||||
* memory spaces at the same time. It keeps views of the same layout
|
|
||||||
* on two memory spaces as well as modified flags for both
|
|
||||||
* allocations. Users are responsible for setting the modified flags
|
|
||||||
* manually if they change the data in either memory space, by calling
|
|
||||||
* the sync() method templated on the device where they modified the
|
|
||||||
* data. Users may synchronize data by calling the modify() function,
|
|
||||||
* templated on the device towards which they want to synchronize
|
|
||||||
* (i.e., the target of the one-way copy operation).
|
|
||||||
*
|
|
||||||
* The DualView class also provides convenience methods such as
|
|
||||||
* realloc, resize and capacity which call the appropriate methods of
|
|
||||||
* the underlying Kokkos::View objects.
|
|
||||||
*
|
|
||||||
* The four template arguments are the same as those of Kokkos::View.
|
|
||||||
* (Please refer to that class' documentation for a detailed
|
|
||||||
* description.)
|
|
||||||
*
|
|
||||||
* \tparam DataType The type of the entries stored in the container.
|
|
||||||
*
|
|
||||||
* \tparam Layout The array's layout in memory.
|
|
||||||
*
|
|
||||||
* \tparam Device The Kokkos Device type. If its memory space is
|
|
||||||
* not the same as the host's memory space, then DualView will
|
|
||||||
* contain two separate Views: one in device memory, and one in
|
|
||||||
* host memory. Otherwise, DualView will only store one View.
|
|
||||||
*
|
|
||||||
* \tparam MemoryTraits (optional) The user's intended memory access
|
|
||||||
* behavior. Please see the documentation of Kokkos::View for
|
|
||||||
* examples. The default suffices for most users.
|
|
||||||
*/
|
|
||||||
template< class DataType ,
|
|
||||||
class Arg1Type = void ,
|
|
||||||
class Arg2Type = void ,
|
|
||||||
class Arg3Type = void>
|
|
||||||
class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
//! \name Typedefs for device types and various Kokkos::View specializations.
|
|
||||||
//@{
|
|
||||||
typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
|
|
||||||
|
|
||||||
//! The Kokkos Host Device type;
|
|
||||||
typedef typename traits::host_mirror_space host_mirror_space ;
|
|
||||||
|
|
||||||
//! The type of a Kokkos::View on the device.
|
|
||||||
typedef View< typename traits::data_type ,
|
|
||||||
Arg1Type ,
|
|
||||||
Arg2Type ,
|
|
||||||
Arg3Type > t_dev ;
|
|
||||||
|
|
||||||
/// \typedef t_host
|
|
||||||
/// \brief The type of a Kokkos::View host mirror of \c t_dev.
|
|
||||||
typedef typename t_dev::HostMirror t_host ;
|
|
||||||
|
|
||||||
//! The type of a const View on the device.
|
|
||||||
//! The type of a Kokkos::View on the device.
|
|
||||||
typedef View< typename traits::const_data_type ,
|
|
||||||
Arg1Type ,
|
|
||||||
Arg2Type ,
|
|
||||||
Arg3Type > t_dev_const ;
|
|
||||||
|
|
||||||
/// \typedef t_host_const
|
|
||||||
/// \brief The type of a const View host mirror of \c t_dev_const.
|
|
||||||
typedef typename t_dev_const::HostMirror t_host_const;
|
|
||||||
|
|
||||||
//! The type of a const, random-access View on the device.
|
|
||||||
typedef View< typename traits::const_data_type ,
|
|
||||||
typename traits::array_layout ,
|
|
||||||
typename traits::device_type ,
|
|
||||||
Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_dev_const_randomread ;
|
|
||||||
|
|
||||||
/// \typedef t_host_const_randomread
|
|
||||||
/// \brief The type of a const, random-access View host mirror of
|
|
||||||
/// \c t_dev_const_randomread.
|
|
||||||
typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread;
|
|
||||||
|
|
||||||
//! The type of an unmanaged View on the device.
|
|
||||||
typedef View< typename traits::data_type ,
|
|
||||||
typename traits::array_layout ,
|
|
||||||
typename traits::device_type ,
|
|
||||||
MemoryUnmanaged> t_dev_um;
|
|
||||||
|
|
||||||
//! The type of an unmanaged View host mirror of \c t_dev_um.
|
|
||||||
typedef View< typename t_host::data_type ,
|
|
||||||
typename t_host::array_layout ,
|
|
||||||
typename t_host::device_type ,
|
|
||||||
MemoryUnmanaged> t_host_um;
|
|
||||||
|
|
||||||
//! The type of a const unmanaged View on the device.
|
|
||||||
typedef View< typename traits::const_data_type ,
|
|
||||||
typename traits::array_layout ,
|
|
||||||
typename traits::device_type ,
|
|
||||||
MemoryUnmanaged> t_dev_const_um;
|
|
||||||
|
|
||||||
//! The type of a const unmanaged View host mirror of \c t_dev_const_um.
|
|
||||||
typedef View<typename t_host::const_data_type,
|
|
||||||
typename t_host::array_layout,
|
|
||||||
typename t_host::device_type,
|
|
||||||
MemoryUnmanaged> t_host_const_um;
|
|
||||||
|
|
||||||
//! The type of a const, random-access View on the device.
|
|
||||||
typedef View< typename t_host::const_data_type ,
|
|
||||||
typename t_host::array_layout ,
|
|
||||||
typename t_host::device_type ,
|
|
||||||
Kokkos::MemoryTraits<Kokkos::Unmanaged|Kokkos::RandomAccess> > t_dev_const_randomread_um ;
|
|
||||||
|
|
||||||
/// \typedef t_host_const_randomread
|
|
||||||
/// \brief The type of a const, random-access View host mirror of
|
|
||||||
/// \c t_dev_const_randomread.
|
|
||||||
typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread_um;
|
|
||||||
|
|
||||||
//@}
|
|
||||||
//! \name The two View instances.
|
|
||||||
//@{
|
|
||||||
|
|
||||||
t_dev d_view;
|
|
||||||
t_host h_view;
|
|
||||||
|
|
||||||
//@}
|
|
||||||
//! \name Counters to keep track of changes ("modified" flags)
|
|
||||||
//@{
|
|
||||||
|
|
||||||
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_device;
|
|
||||||
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_host;
|
|
||||||
|
|
||||||
//@}
|
|
||||||
//! \name Constructors
|
|
||||||
//@{
|
|
||||||
|
|
||||||
/// \brief Empty constructor.
|
|
||||||
///
|
|
||||||
/// Both device and host View objects are constructed using their
|
|
||||||
/// default constructors. The "modified" flags are both initialized
|
|
||||||
/// to "unmodified."
|
|
||||||
DualView () :
|
|
||||||
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
|
|
||||||
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
|
|
||||||
{}
|
|
||||||
|
|
||||||
/// \brief Constructor that allocates View objects on both host and device.
|
|
||||||
///
|
|
||||||
/// This constructor works like the analogous constructor of View.
|
|
||||||
/// The first argument is a string label, which is entirely for your
|
|
||||||
/// benefit. (Different DualView objects may have the same label if
|
|
||||||
/// you like.) The arguments that follow are the dimensions of the
|
|
||||||
/// View objects. For example, if the View has three dimensions,
|
|
||||||
/// the first three integer arguments will be nonzero, and you may
|
|
||||||
/// omit the integer arguments that follow.
|
|
||||||
DualView (const std::string& label,
|
|
||||||
const size_t n0 = 0,
|
|
||||||
const size_t n1 = 0,
|
|
||||||
const size_t n2 = 0,
|
|
||||||
const size_t n3 = 0,
|
|
||||||
const size_t n4 = 0,
|
|
||||||
const size_t n5 = 0,
|
|
||||||
const size_t n6 = 0,
|
|
||||||
const size_t n7 = 0)
|
|
||||||
: d_view (label, n0, n1, n2, n3, n4, n5, n6, n7)
|
|
||||||
, h_view (create_mirror_view (d_view)) // without UVM, host View mirrors
|
|
||||||
, modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device"))
|
|
||||||
, modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
|
|
||||||
{}
|
|
||||||
|
|
||||||
//! Copy constructor (shallow copy)
|
|
||||||
template<class SS, class LS, class DS, class MS>
|
|
||||||
DualView (const DualView<SS,LS,DS,MS>& src) :
|
|
||||||
d_view (src.d_view),
|
|
||||||
h_view (src.h_view),
|
|
||||||
modified_device (src.modified_device),
|
|
||||||
modified_host (src.modified_host)
|
|
||||||
{}
|
|
||||||
|
|
||||||
//! Subview constructor
|
|
||||||
template< class SD, class S1 , class S2 , class S3
|
|
||||||
, class Arg0 , class ... Args >
|
|
||||||
DualView( const DualView<SD,S1,S2,S3> & src
|
|
||||||
, const Arg0 & arg0
|
|
||||||
, Args ... args
|
|
||||||
)
|
|
||||||
: d_view( Kokkos::subview( src.d_view , arg0 , args ... ) )
|
|
||||||
, h_view( Kokkos::subview( src.h_view , arg0 , args ... ) )
|
|
||||||
, modified_device (src.modified_device)
|
|
||||||
, modified_host (src.modified_host)
|
|
||||||
{}
|
|
||||||
|
|
||||||
/// \brief Create DualView from existing device and host View objects.
|
|
||||||
///
|
|
||||||
/// This constructor assumes that the device and host View objects
|
|
||||||
/// are synchronized. You, the caller, are responsible for making
|
|
||||||
/// sure this is the case before calling this constructor. After
|
|
||||||
/// this constructor returns, you may use DualView's sync() and
|
|
||||||
/// modify() methods to ensure synchronization of the View objects.
|
|
||||||
///
|
|
||||||
/// \param d_view_ Device View
|
|
||||||
/// \param h_view_ Host View (must have type t_host = t_dev::HostMirror)
|
|
||||||
DualView (const t_dev& d_view_, const t_host& h_view_) :
|
|
||||||
d_view (d_view_),
|
|
||||||
h_view (h_view_),
|
|
||||||
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
|
|
||||||
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
|
|
||||||
{
|
|
||||||
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
|
||||||
Impl::assert_shapes_are_equal (d_view.shape (), h_view.shape ());
|
|
||||||
#else
|
|
||||||
if ( int(d_view.rank) != int(h_view.rank) ||
|
|
||||||
d_view.dimension_0() != h_view.dimension_0() ||
|
|
||||||
d_view.dimension_1() != h_view.dimension_1() ||
|
|
||||||
d_view.dimension_2() != h_view.dimension_2() ||
|
|
||||||
d_view.dimension_3() != h_view.dimension_3() ||
|
|
||||||
d_view.dimension_4() != h_view.dimension_4() ||
|
|
||||||
d_view.dimension_5() != h_view.dimension_5() ||
|
|
||||||
d_view.dimension_6() != h_view.dimension_6() ||
|
|
||||||
d_view.dimension_7() != h_view.dimension_7() ||
|
|
||||||
d_view.stride_0() != h_view.stride_0() ||
|
|
||||||
d_view.stride_1() != h_view.stride_1() ||
|
|
||||||
d_view.stride_2() != h_view.stride_2() ||
|
|
||||||
d_view.stride_3() != h_view.stride_3() ||
|
|
||||||
d_view.stride_4() != h_view.stride_4() ||
|
|
||||||
d_view.stride_5() != h_view.stride_5() ||
|
|
||||||
d_view.stride_6() != h_view.stride_6() ||
|
|
||||||
d_view.stride_7() != h_view.stride_7() ||
|
|
||||||
d_view.span() != h_view.span() ) {
|
|
||||||
Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
//@}
|
|
||||||
//! \name Methods for synchronizing, marking as modified, and getting Views.
|
|
||||||
//@{
|
|
||||||
|
|
||||||
/// \brief Return a View on a specific device \c Device.
|
|
||||||
///
|
|
||||||
/// Please don't be afraid of the if_c expression in the return
|
|
||||||
/// value's type. That just tells the method what the return type
|
|
||||||
/// should be: t_dev if the \c Device template parameter matches
|
|
||||||
/// this DualView's device type, else t_host.
|
|
||||||
///
|
|
||||||
/// For example, suppose you create a DualView on Cuda, like this:
|
|
||||||
/// \code
|
|
||||||
/// typedef Kokkos::DualView<float, Kokkos::LayoutRight, Kokkos::Cuda> dual_view_type;
|
|
||||||
/// dual_view_type DV ("my dual view", 100);
|
|
||||||
/// \endcode
|
|
||||||
/// If you want to get the CUDA device View, do this:
|
|
||||||
/// \code
|
|
||||||
/// typename dual_view_type::t_dev cudaView = DV.view<Kokkos::Cuda> ();
|
|
||||||
/// \endcode
|
|
||||||
/// and if you want to get the host mirror of that View, do this:
|
|
||||||
/// \code
|
|
||||||
/// typedef typename Kokkos::HostSpace::execution_space host_device_type;
|
|
||||||
/// typename dual_view_type::t_host hostView = DV.view<host_device_type> ();
|
|
||||||
/// \endcode
|
|
||||||
template< class Device >
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
const typename Impl::if_c<
|
|
||||||
Impl::is_same<typename t_dev::memory_space,
|
|
||||||
typename Device::memory_space>::value,
|
|
||||||
t_dev,
|
|
||||||
t_host>::type& view () const
|
|
||||||
{
|
|
||||||
return Impl::if_c<
|
|
||||||
Impl::is_same<
|
|
||||||
typename t_dev::memory_space,
|
|
||||||
typename Device::memory_space>::value,
|
|
||||||
t_dev,
|
|
||||||
t_host >::select (d_view , h_view);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Update data on device or host only if data in the other
|
|
||||||
/// space has been marked as modified.
|
|
||||||
///
|
|
||||||
/// If \c Device is the same as this DualView's device type, then
|
|
||||||
/// copy data from host to device. Otherwise, copy data from device
|
|
||||||
/// to host. In either case, only copy if the source of the copy
|
|
||||||
/// has been modified.
|
|
||||||
///
|
|
||||||
/// This is a one-way synchronization only. If the target of the
|
|
||||||
/// copy has been modified, this operation will discard those
|
|
||||||
/// modifications. It will also reset both device and host modified
|
|
||||||
/// flags.
|
|
||||||
///
|
|
||||||
/// \note This method doesn't know on its own whether you modified
|
|
||||||
/// the data in either View. You must manually mark modified data
|
|
||||||
/// as modified, by calling the modify() method with the
|
|
||||||
/// appropriate template parameter.
|
|
||||||
template<class Device>
|
|
||||||
void sync( const typename Impl::enable_if<
|
|
||||||
( Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) ||
|
|
||||||
( Impl::is_same< Device , int>::value)
|
|
||||||
, int >::type& = 0)
|
|
||||||
{
|
|
||||||
const unsigned int dev =
|
|
||||||
Impl::if_c<
|
|
||||||
Impl::is_same<
|
|
||||||
typename t_dev::memory_space,
|
|
||||||
typename Device::memory_space>::value ,
|
|
||||||
unsigned int,
|
|
||||||
unsigned int>::select (1, 0);
|
|
||||||
|
|
||||||
if (dev) { // if Device is the same as DualView's device type
|
|
||||||
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
|
|
||||||
deep_copy (d_view, h_view);
|
|
||||||
modified_host() = modified_device() = 0;
|
|
||||||
}
|
|
||||||
} else { // hopefully Device is the same as DualView's host type
|
|
||||||
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
|
|
||||||
deep_copy (h_view, d_view);
|
|
||||||
modified_host() = modified_device() = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(Impl::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) {
|
|
||||||
t_dev::execution_space::fence();
|
|
||||||
t_host::execution_space::fence();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Device>
|
|
||||||
void sync ( const typename Impl::enable_if<
|
|
||||||
( ! Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) ||
|
|
||||||
( Impl::is_same< Device , int>::value)
|
|
||||||
, int >::type& = 0 )
|
|
||||||
{
|
|
||||||
const unsigned int dev =
|
|
||||||
Impl::if_c<
|
|
||||||
Impl::is_same<
|
|
||||||
typename t_dev::memory_space,
|
|
||||||
typename Device::memory_space>::value,
|
|
||||||
unsigned int,
|
|
||||||
unsigned int>::select (1, 0);
|
|
||||||
if (dev) { // if Device is the same as DualView's device type
|
|
||||||
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
|
|
||||||
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
|
|
||||||
}
|
|
||||||
} else { // hopefully Device is the same as DualView's host type
|
|
||||||
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
|
|
||||||
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Device>
|
|
||||||
bool need_sync()
|
|
||||||
{
|
|
||||||
const unsigned int dev =
|
|
||||||
Impl::if_c<
|
|
||||||
Impl::is_same<
|
|
||||||
typename t_dev::memory_space,
|
|
||||||
typename Device::memory_space>::value ,
|
|
||||||
unsigned int,
|
|
||||||
unsigned int>::select (1, 0);
|
|
||||||
|
|
||||||
if (dev) { // if Device is the same as DualView's device type
|
|
||||||
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} else { // hopefully Device is the same as DualView's host type
|
|
||||||
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
/// \brief Mark data as modified on the given device \c Device.
|
|
||||||
///
|
|
||||||
/// If \c Device is the same as this DualView's device type, then
|
|
||||||
/// mark the device's data as modified. Otherwise, mark the host's
|
|
||||||
/// data as modified.
|
|
||||||
template<class Device>
|
|
||||||
void modify () {
|
|
||||||
const unsigned int dev =
|
|
||||||
Impl::if_c<
|
|
||||||
Impl::is_same<
|
|
||||||
typename t_dev::memory_space,
|
|
||||||
typename Device::memory_space>::value,
|
|
||||||
unsigned int,
|
|
||||||
unsigned int>::select (1, 0);
|
|
||||||
|
|
||||||
if (dev) { // if Device is the same as DualView's device type
|
|
||||||
// Increment the device's modified count.
|
|
||||||
modified_device () = (modified_device () > modified_host () ?
|
|
||||||
modified_device () : modified_host ()) + 1;
|
|
||||||
} else { // hopefully Device is the same as DualView's host type
|
|
||||||
// Increment the host's modified count.
|
|
||||||
modified_host () = (modified_device () > modified_host () ?
|
|
||||||
modified_device () : modified_host ()) + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//@}
|
|
||||||
//! \name Methods for reallocating or resizing the View objects.
|
|
||||||
//@{
|
|
||||||
|
|
||||||
/// \brief Reallocate both View objects.
|
|
||||||
///
|
|
||||||
/// This discards any existing contents of the objects, and resets
|
|
||||||
/// their modified flags. It does <i>not</i> copy the old contents
|
|
||||||
/// of either View into the new View objects.
|
|
||||||
void realloc( const size_t n0 = 0 ,
|
|
||||||
const size_t n1 = 0 ,
|
|
||||||
const size_t n2 = 0 ,
|
|
||||||
const size_t n3 = 0 ,
|
|
||||||
const size_t n4 = 0 ,
|
|
||||||
const size_t n5 = 0 ,
|
|
||||||
const size_t n6 = 0 ,
|
|
||||||
const size_t n7 = 0 ) {
|
|
||||||
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
|
|
||||||
h_view = create_mirror_view( d_view );
|
|
||||||
|
|
||||||
/* Reset dirty flags */
|
|
||||||
modified_device() = modified_host() = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Resize both views, copying old contents into new if necessary.
|
|
||||||
///
|
|
||||||
/// This method only copies the old contents into the new View
|
|
||||||
/// objects for the device which was last marked as modified.
|
|
||||||
void resize( const size_t n0 = 0 ,
|
|
||||||
const size_t n1 = 0 ,
|
|
||||||
const size_t n2 = 0 ,
|
|
||||||
const size_t n3 = 0 ,
|
|
||||||
const size_t n4 = 0 ,
|
|
||||||
const size_t n5 = 0 ,
|
|
||||||
const size_t n6 = 0 ,
|
|
||||||
const size_t n7 = 0 ) {
|
|
||||||
if(modified_device() >= modified_host()) {
|
|
||||||
/* Resize on Device */
|
|
||||||
::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
|
|
||||||
h_view = create_mirror_view( d_view );
|
|
||||||
|
|
||||||
/* Mark Device copy as modified */
|
|
||||||
modified_device() = modified_device()+1;
|
|
||||||
|
|
||||||
} else {
|
|
||||||
/* Realloc on Device */
|
|
||||||
|
|
||||||
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
|
|
||||||
t_host temp_view = create_mirror_view( d_view );
|
|
||||||
|
|
||||||
/* Remap on Host */
|
|
||||||
Kokkos::deep_copy( temp_view , h_view );
|
|
||||||
|
|
||||||
h_view = temp_view;
|
|
||||||
|
|
||||||
/* Mark Host copy as modified */
|
|
||||||
modified_host() = modified_host()+1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//@}
|
|
||||||
//! \name Methods for getting capacity, stride, or dimension(s).
|
|
||||||
//@{
|
|
||||||
|
|
||||||
//! The allocation size (same as Kokkos::View::capacity).
|
|
||||||
size_t capacity() const {
|
|
||||||
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
|
||||||
return d_view.span();
|
|
||||||
#else
|
|
||||||
return d_view.capacity();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
//! Get stride(s) for each dimension.
|
|
||||||
template< typename iType>
|
|
||||||
void stride(iType* stride_) const {
|
|
||||||
d_view.stride(stride_);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* \brief return size of dimension 0 */
|
|
||||||
size_t dimension_0() const {return d_view.dimension_0();}
|
|
||||||
/* \brief return size of dimension 1 */
|
|
||||||
size_t dimension_1() const {return d_view.dimension_1();}
|
|
||||||
/* \brief return size of dimension 2 */
|
|
||||||
size_t dimension_2() const {return d_view.dimension_2();}
|
|
||||||
/* \brief return size of dimension 3 */
|
|
||||||
size_t dimension_3() const {return d_view.dimension_3();}
|
|
||||||
/* \brief return size of dimension 4 */
|
|
||||||
size_t dimension_4() const {return d_view.dimension_4();}
|
|
||||||
/* \brief return size of dimension 5 */
|
|
||||||
size_t dimension_5() const {return d_view.dimension_5();}
|
|
||||||
/* \brief return size of dimension 6 */
|
|
||||||
size_t dimension_6() const {return d_view.dimension_6();}
|
|
||||||
/* \brief return size of dimension 7 */
|
|
||||||
size_t dimension_7() const {return d_view.dimension_7();}
|
|
||||||
|
|
||||||
//@}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace Kokkos
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Partial specializations of Kokkos::subview() for DualView objects.
|
|
||||||
//
|
|
||||||
|
|
||||||
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
namespace Impl {
|
|
||||||
|
|
||||||
template< class D, class A1, class A2, class A3, class ... Args >
|
|
||||||
struct DualViewSubview {
|
|
||||||
|
|
||||||
typedef typename Kokkos::Experimental::Impl::ViewMapping
|
|
||||||
< void
|
|
||||||
, Kokkos::ViewTraits< D, A1, A2, A3 >
|
|
||||||
, Args ...
|
|
||||||
>::traits_type dst_traits ;
|
|
||||||
|
|
||||||
typedef Kokkos::DualView
|
|
||||||
< typename dst_traits::data_type
|
|
||||||
, typename dst_traits::array_layout
|
|
||||||
, typename dst_traits::device_type
|
|
||||||
, typename dst_traits::memory_traits
|
|
||||||
> type ;
|
|
||||||
};
|
|
||||||
|
|
||||||
} /* namespace Impl */
|
|
||||||
|
|
||||||
|
|
||||||
template< class D , class A1 , class A2 , class A3 , class ... Args >
|
|
||||||
typename Impl::DualViewSubview<D,A1,A2,A3,Args...>::type
|
|
||||||
subview( const DualView<D,A1,A2,A3> & src , Args ... args )
|
|
||||||
{
|
|
||||||
return typename
|
|
||||||
Impl::DualViewSubview<D,A1,A2,A3,Args...>::type( src , args ... );
|
|
||||||
}
|
|
||||||
|
|
||||||
} /* namespace Kokkos */
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Partial specializations of Kokkos::subview() for DualView objects.
|
|
||||||
//
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
namespace Impl {
|
|
||||||
|
|
||||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
|
||||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
|
||||||
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
|
|
||||||
>
|
|
||||||
struct ViewSubview< DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type >
|
|
||||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
|
||||||
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
|
|
||||||
typedef DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > SrcViewType ;
|
|
||||||
|
|
||||||
enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
|
|
||||||
enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
|
|
||||||
enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
|
|
||||||
enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
|
|
||||||
enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
|
|
||||||
enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
|
|
||||||
enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
|
|
||||||
enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
|
|
||||||
|
|
||||||
// The source view rank must be equal to the input argument rank
|
|
||||||
// Once a void argument is encountered all subsequent arguments must be void.
|
|
||||||
enum { InputRank =
|
|
||||||
Impl::StaticAssert<( SrcViewType::rank ==
|
|
||||||
( V0 ? 0 : (
|
|
||||||
V1 ? 1 : (
|
|
||||||
V2 ? 2 : (
|
|
||||||
V3 ? 3 : (
|
|
||||||
V4 ? 4 : (
|
|
||||||
V5 ? 5 : (
|
|
||||||
V6 ? 6 : (
|
|
||||||
V7 ? 7 : 8 ))))))) ))
|
|
||||||
&&
|
|
||||||
( SrcViewType::rank ==
|
|
||||||
( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
|
|
||||||
>::value ? SrcViewType::rank : 0 };
|
|
||||||
|
|
||||||
enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
|
|
||||||
enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
|
|
||||||
enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
|
|
||||||
enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
|
|
||||||
enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
|
|
||||||
enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
|
|
||||||
enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
|
|
||||||
enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
|
|
||||||
|
|
||||||
enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
|
|
||||||
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
|
|
||||||
|
|
||||||
// Reverse
|
|
||||||
enum { R0_rev = 0 == InputRank ? 0u : (
|
|
||||||
1 == InputRank ? unsigned(R0) : (
|
|
||||||
2 == InputRank ? unsigned(R1) : (
|
|
||||||
3 == InputRank ? unsigned(R2) : (
|
|
||||||
4 == InputRank ? unsigned(R3) : (
|
|
||||||
5 == InputRank ? unsigned(R4) : (
|
|
||||||
6 == InputRank ? unsigned(R5) : (
|
|
||||||
7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
|
|
||||||
|
|
||||||
typedef typename SrcViewType::array_layout SrcViewLayout ;
|
|
||||||
|
|
||||||
// Choose array layout, attempting to preserve original layout if at all possible.
|
|
||||||
typedef typename Impl::if_c<
|
|
||||||
( // Same Layout IF
|
|
||||||
// OutputRank 0
|
|
||||||
( OutputRank == 0 )
|
|
||||||
||
|
|
||||||
// OutputRank 1 or 2, InputLayout Left, Interval 0
|
|
||||||
// because single stride one or second index has a stride.
|
|
||||||
( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
|
|
||||||
||
|
|
||||||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
|
|
||||||
// because single stride one or second index has a stride.
|
|
||||||
( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
|
|
||||||
), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
|
|
||||||
|
|
||||||
// Choose data type as a purely dynamic rank array to accomodate a runtime range.
|
|
||||||
typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
|
|
||||||
typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
|
|
||||||
typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
|
|
||||||
typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
|
|
||||||
typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
|
|
||||||
typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
|
|
||||||
typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
|
|
||||||
typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
|
|
||||||
typename SrcViewType::value_type ********
|
|
||||||
>::type >::type >::type >::type >::type >::type >::type >::type OutputData ;
|
|
||||||
|
|
||||||
// Choose space.
|
|
||||||
// If the source view's template arg1 or arg2 is a space then use it,
|
|
||||||
// otherwise use the source view's execution space.
|
|
||||||
|
|
||||||
typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
|
|
||||||
typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::execution_space
|
|
||||||
>::type >::type OutputSpace ;
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
// If keeping the layout then match non-data type arguments
|
|
||||||
// else keep execution space and memory traits.
|
|
||||||
typedef typename
|
|
||||||
Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
|
|
||||||
, Kokkos::DualView< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type >
|
|
||||||
, Kokkos::DualView< OutputData , OutputViewLayout , OutputSpace
|
|
||||||
, typename SrcViewType::memory_traits >
|
|
||||||
>::type type ;
|
|
||||||
};
|
|
||||||
|
|
||||||
} /* namespace Impl */
|
|
||||||
} /* namespace Kokkos */
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
|
|
||||||
template< class D , class A1 , class A2 , class A3 ,
|
|
||||||
class ArgType0 >
|
|
||||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , void , void , void
|
|
||||||
, void , void , void , void
|
|
||||||
>::type
|
|
||||||
subview( const DualView<D,A1,A2,A3> & src ,
|
|
||||||
const ArgType0 & arg0 )
|
|
||||||
{
|
|
||||||
typedef typename
|
|
||||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , void , void , void
|
|
||||||
, void , void , void , void
|
|
||||||
>::type
|
|
||||||
DstViewType ;
|
|
||||||
DstViewType sub_view;
|
|
||||||
sub_view.d_view = subview(src.d_view,arg0);
|
|
||||||
sub_view.h_view = subview(src.h_view,arg0);
|
|
||||||
sub_view.modified_device = src.modified_device;
|
|
||||||
sub_view.modified_host = src.modified_host;
|
|
||||||
return sub_view;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template< class D , class A1 , class A2 , class A3 ,
|
|
||||||
class ArgType0 , class ArgType1 >
|
|
||||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , void , void
|
|
||||||
, void , void , void , void
|
|
||||||
>::type
|
|
||||||
subview( const DualView<D,A1,A2,A3> & src ,
|
|
||||||
const ArgType0 & arg0 ,
|
|
||||||
const ArgType1 & arg1 )
|
|
||||||
{
|
|
||||||
typedef typename
|
|
||||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , void , void
|
|
||||||
, void , void , void , void
|
|
||||||
>::type
|
|
||||||
DstViewType ;
|
|
||||||
DstViewType sub_view;
|
|
||||||
sub_view.d_view = subview(src.d_view,arg0,arg1);
|
|
||||||
sub_view.h_view = subview(src.h_view,arg0,arg1);
|
|
||||||
sub_view.modified_device = src.modified_device;
|
|
||||||
sub_view.modified_host = src.modified_host;
|
|
||||||
return sub_view;
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class D , class A1 , class A2 , class A3 ,
|
|
||||||
class ArgType0 , class ArgType1 , class ArgType2 >
|
|
||||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , ArgType2 , void
|
|
||||||
, void , void , void , void
|
|
||||||
>::type
|
|
||||||
subview( const DualView<D,A1,A2,A3> & src ,
|
|
||||||
const ArgType0 & arg0 ,
|
|
||||||
const ArgType1 & arg1 ,
|
|
||||||
const ArgType2 & arg2 )
|
|
||||||
{
|
|
||||||
typedef typename
|
|
||||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , ArgType2 , void
|
|
||||||
, void , void , void , void
|
|
||||||
>::type
|
|
||||||
DstViewType ;
|
|
||||||
DstViewType sub_view;
|
|
||||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2);
|
|
||||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2);
|
|
||||||
sub_view.modified_device = src.modified_device;
|
|
||||||
sub_view.modified_host = src.modified_host;
|
|
||||||
return sub_view;
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class D , class A1 , class A2 , class A3 ,
|
|
||||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 >
|
|
||||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
|
||||||
, void , void , void , void
|
|
||||||
>::type
|
|
||||||
subview( const DualView<D,A1,A2,A3> & src ,
|
|
||||||
const ArgType0 & arg0 ,
|
|
||||||
const ArgType1 & arg1 ,
|
|
||||||
const ArgType2 & arg2 ,
|
|
||||||
const ArgType3 & arg3 )
|
|
||||||
{
|
|
||||||
typedef typename
|
|
||||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
|
||||||
, void , void , void , void
|
|
||||||
>::type
|
|
||||||
DstViewType ;
|
|
||||||
DstViewType sub_view;
|
|
||||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3);
|
|
||||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3);
|
|
||||||
sub_view.modified_device = src.modified_device;
|
|
||||||
sub_view.modified_host = src.modified_host;
|
|
||||||
return sub_view;
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class D , class A1 , class A2 , class A3 ,
|
|
||||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
|
||||||
class ArgType4 >
|
|
||||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
|
||||||
, ArgType4 , void , void , void
|
|
||||||
>::type
|
|
||||||
subview( const DualView<D,A1,A2,A3> & src ,
|
|
||||||
const ArgType0 & arg0 ,
|
|
||||||
const ArgType1 & arg1 ,
|
|
||||||
const ArgType2 & arg2 ,
|
|
||||||
const ArgType3 & arg3 ,
|
|
||||||
const ArgType4 & arg4 )
|
|
||||||
{
|
|
||||||
typedef typename
|
|
||||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
|
||||||
, ArgType4 , void , void ,void
|
|
||||||
>::type
|
|
||||||
DstViewType ;
|
|
||||||
DstViewType sub_view;
|
|
||||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4);
|
|
||||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4);
|
|
||||||
sub_view.modified_device = src.modified_device;
|
|
||||||
sub_view.modified_host = src.modified_host;
|
|
||||||
return sub_view;
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class D , class A1 , class A2 , class A3 ,
|
|
||||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
|
||||||
class ArgType4 , class ArgType5 >
|
|
||||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
|
||||||
, ArgType4 , ArgType5 , void , void
|
|
||||||
>::type
|
|
||||||
subview( const DualView<D,A1,A2,A3> & src ,
|
|
||||||
const ArgType0 & arg0 ,
|
|
||||||
const ArgType1 & arg1 ,
|
|
||||||
const ArgType2 & arg2 ,
|
|
||||||
const ArgType3 & arg3 ,
|
|
||||||
const ArgType4 & arg4 ,
|
|
||||||
const ArgType5 & arg5 )
|
|
||||||
{
|
|
||||||
typedef typename
|
|
||||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
|
||||||
, ArgType4 , ArgType5 , void , void
|
|
||||||
>::type
|
|
||||||
DstViewType ;
|
|
||||||
DstViewType sub_view;
|
|
||||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5);
|
|
||||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5);
|
|
||||||
sub_view.modified_device = src.modified_device;
|
|
||||||
sub_view.modified_host = src.modified_host;
|
|
||||||
return sub_view;
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class D , class A1 , class A2 , class A3 ,
|
|
||||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
|
||||||
class ArgType4 , class ArgType5 , class ArgType6 >
|
|
||||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
|
||||||
, ArgType4 , ArgType5 , ArgType6 , void
|
|
||||||
>::type
|
|
||||||
subview( const DualView<D,A1,A2,A3> & src ,
|
|
||||||
const ArgType0 & arg0 ,
|
|
||||||
const ArgType1 & arg1 ,
|
|
||||||
const ArgType2 & arg2 ,
|
|
||||||
const ArgType3 & arg3 ,
|
|
||||||
const ArgType4 & arg4 ,
|
|
||||||
const ArgType5 & arg5 ,
|
|
||||||
const ArgType6 & arg6 )
|
|
||||||
{
|
|
||||||
typedef typename
|
|
||||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
|
||||||
, ArgType4 , ArgType5 , ArgType6 , void
|
|
||||||
>::type
|
|
||||||
DstViewType ;
|
|
||||||
DstViewType sub_view;
|
|
||||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
|
|
||||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
|
|
||||||
sub_view.modified_device = src.modified_device;
|
|
||||||
sub_view.modified_host = src.modified_host;
|
|
||||||
return sub_view;
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class D , class A1 , class A2 , class A3 ,
|
|
||||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
|
||||||
class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 >
|
|
||||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
|
||||||
, ArgType4 , ArgType5 , ArgType6 , ArgType7
|
|
||||||
>::type
|
|
||||||
subview( const DualView<D,A1,A2,A3> & src ,
|
|
||||||
const ArgType0 & arg0 ,
|
|
||||||
const ArgType1 & arg1 ,
|
|
||||||
const ArgType2 & arg2 ,
|
|
||||||
const ArgType3 & arg3 ,
|
|
||||||
const ArgType4 & arg4 ,
|
|
||||||
const ArgType5 & arg5 ,
|
|
||||||
const ArgType6 & arg6 ,
|
|
||||||
const ArgType7 & arg7 )
|
|
||||||
{
|
|
||||||
typedef typename
|
|
||||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
|
||||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
|
||||||
, ArgType4 , ArgType5 , ArgType6 , ArgType7
|
|
||||||
>::type
|
|
||||||
DstViewType ;
|
|
||||||
DstViewType sub_view;
|
|
||||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
|
|
||||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
|
|
||||||
sub_view.modified_device = src.modified_device;
|
|
||||||
sub_view.modified_host = src.modified_host;
|
|
||||||
return sub_view;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Kokkos
|
|
||||||
|
|
||||||
#endif /* defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
|
|
||||||
//
|
|
||||||
// Partial specialization of Kokkos::deep_copy() for DualView objects.
|
|
||||||
//
|
|
||||||
|
|
||||||
template< class DT , class DL , class DD , class DM ,
|
|
||||||
class ST , class SL , class SD , class SM >
|
|
||||||
void
|
|
||||||
deep_copy (DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
|
|
||||||
const DualView<ST,SL,SD,SM>& src )
|
|
||||||
{
|
|
||||||
if (src.modified_device () >= src.modified_host ()) {
|
|
||||||
deep_copy (dst.d_view, src.d_view);
|
|
||||||
dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
|
|
||||||
} else {
|
|
||||||
deep_copy (dst.h_view, src.h_view);
|
|
||||||
dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class ExecutionSpace ,
|
|
||||||
class DT , class DL , class DD , class DM ,
|
|
||||||
class ST , class SL , class SD , class SM >
|
|
||||||
void
|
|
||||||
deep_copy (const ExecutionSpace& exec ,
|
|
||||||
DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
|
|
||||||
const DualView<ST,SL,SD,SM>& src )
|
|
||||||
{
|
|
||||||
if (src.modified_device () >= src.modified_host ()) {
|
|
||||||
deep_copy (exec, dst.d_view, src.d_view);
|
|
||||||
dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
|
|
||||||
} else {
|
|
||||||
deep_copy (exec, dst.h_view, src.h_view);
|
|
||||||
dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Kokkos
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@ -1,173 +0,0 @@
|
|||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
|
|
||||||
#ifndef KOKKOS_FUNCTIONAL_HPP
|
|
||||||
#define KOKKOS_FUNCTIONAL_HPP
|
|
||||||
|
|
||||||
#include <Kokkos_Macros.hpp>
|
|
||||||
#include <impl/Kokkos_Functional_impl.hpp>
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
|
|
||||||
// These should work for most types
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct pod_hash
|
|
||||||
{
|
|
||||||
typedef T argument_type;
|
|
||||||
typedef T first_argument_type;
|
|
||||||
typedef uint32_t second_argument_type;
|
|
||||||
typedef uint32_t result_type;
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
uint32_t operator()(T const & t) const
|
|
||||||
{ return Impl::MurmurHash3_x86_32( &t, sizeof(T), 0); }
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
uint32_t operator()(T const & t, uint32_t seed) const
|
|
||||||
{ return Impl::MurmurHash3_x86_32( &t, sizeof(T), seed); }
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct pod_equal_to
|
|
||||||
{
|
|
||||||
typedef T first_argument_type;
|
|
||||||
typedef T second_argument_type;
|
|
||||||
typedef bool result_type;
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool operator()(T const & a, T const & b) const
|
|
||||||
{ return Impl::bitwise_equal(&a,&b); }
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct pod_not_equal_to
|
|
||||||
{
|
|
||||||
typedef T first_argument_type;
|
|
||||||
typedef T second_argument_type;
|
|
||||||
typedef bool result_type;
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool operator()(T const & a, T const & b) const
|
|
||||||
{ return !Impl::bitwise_equal(&a,&b); }
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct equal_to
|
|
||||||
{
|
|
||||||
typedef T first_argument_type;
|
|
||||||
typedef T second_argument_type;
|
|
||||||
typedef bool result_type;
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool operator()(T const & a, T const & b) const
|
|
||||||
{ return a == b; }
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct not_equal_to
|
|
||||||
{
|
|
||||||
typedef T first_argument_type;
|
|
||||||
typedef T second_argument_type;
|
|
||||||
typedef bool result_type;
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool operator()(T const & a, T const & b) const
|
|
||||||
{ return a != b; }
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct greater
|
|
||||||
{
|
|
||||||
typedef T first_argument_type;
|
|
||||||
typedef T second_argument_type;
|
|
||||||
typedef bool result_type;
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool operator()(T const & a, T const & b) const
|
|
||||||
{ return a > b; }
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct less
|
|
||||||
{
|
|
||||||
typedef T first_argument_type;
|
|
||||||
typedef T second_argument_type;
|
|
||||||
typedef bool result_type;
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool operator()(T const & a, T const & b) const
|
|
||||||
{ return a < b; }
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct greater_equal
|
|
||||||
{
|
|
||||||
typedef T first_argument_type;
|
|
||||||
typedef T second_argument_type;
|
|
||||||
typedef bool result_type;
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool operator()(T const & a, T const & b) const
|
|
||||||
{ return a >= b; }
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct less_equal
|
|
||||||
{
|
|
||||||
typedef T first_argument_type;
|
|
||||||
typedef T second_argument_type;
|
|
||||||
typedef bool result_type;
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool operator()(T const & a, T const & b) const
|
|
||||||
{ return a <= b; }
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace Kokkos
|
|
||||||
|
|
||||||
|
|
||||||
#endif //KOKKOS_FUNCTIONAL_HPP
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,531 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef KOKKOS_SEGMENTED_VIEW_HPP_
|
|
||||||
#define KOKKOS_SEGMENTED_VIEW_HPP_
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
#include <impl/Kokkos_Error.hpp>
|
|
||||||
#include <cstdio>
|
|
||||||
|
|
||||||
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
namespace Experimental {
|
|
||||||
|
|
||||||
namespace Impl {
|
|
||||||
|
|
||||||
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
|
|
||||||
struct delete_segmented_view;
|
|
||||||
|
|
||||||
template<class MemorySpace>
|
|
||||||
inline
|
|
||||||
void DeviceSetAllocatableMemorySize(size_t) {}
|
|
||||||
|
|
||||||
#if defined( KOKKOS_HAVE_CUDA )
|
|
||||||
|
|
||||||
template<>
|
|
||||||
inline
|
|
||||||
void DeviceSetAllocatableMemorySize<Kokkos::CudaSpace>(size_t size) {
|
|
||||||
#ifdef __CUDACC__
|
|
||||||
size_t size_limit;
|
|
||||||
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
|
||||||
if(size_limit<size)
|
|
||||||
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
|
|
||||||
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
|
||||||
inline
|
|
||||||
void DeviceSetAllocatableMemorySize<Kokkos::CudaUVMSpace>(size_t size) {
|
|
||||||
#ifdef __CUDACC__
|
|
||||||
size_t size_limit;
|
|
||||||
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
|
||||||
if(size_limit<size)
|
|
||||||
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
|
|
||||||
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class DataType ,
|
|
||||||
class Arg1Type = void ,
|
|
||||||
class Arg2Type = void ,
|
|
||||||
class Arg3Type = void>
|
|
||||||
class SegmentedView : public Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
//! \name Typedefs for device types and various Kokkos::View specializations.
|
|
||||||
//@{
|
|
||||||
typedef Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
|
|
||||||
|
|
||||||
//! The type of a Kokkos::View on the device.
|
|
||||||
typedef Kokkos::View< typename traits::data_type ,
|
|
||||||
typename traits::array_layout ,
|
|
||||||
typename traits::memory_space ,
|
|
||||||
Kokkos::MemoryUnmanaged > t_dev ;
|
|
||||||
|
|
||||||
|
|
||||||
private:
|
|
||||||
Kokkos::View<t_dev*,typename traits::memory_space> segments_;
|
|
||||||
|
|
||||||
Kokkos::View<int,typename traits::memory_space> realloc_lock;
|
|
||||||
Kokkos::View<int,typename traits::memory_space> nsegments_;
|
|
||||||
|
|
||||||
size_t segment_length_;
|
|
||||||
size_t segment_length_m1_;
|
|
||||||
int max_segments_;
|
|
||||||
|
|
||||||
int segment_length_log2;
|
|
||||||
|
|
||||||
// Dimensions, cardinality, capacity, and offset computation for
|
|
||||||
// multidimensional array view of contiguous memory.
|
|
||||||
// Inherits from Impl::Shape
|
|
||||||
typedef Kokkos::Impl::ViewOffset< typename traits::shape_type
|
|
||||||
, typename traits::array_layout
|
|
||||||
> offset_map_type ;
|
|
||||||
|
|
||||||
offset_map_type m_offset_map ;
|
|
||||||
|
|
||||||
typedef Kokkos::View< typename traits::array_intrinsic_type ,
|
|
||||||
typename traits::array_layout ,
|
|
||||||
typename traits::memory_space ,
|
|
||||||
typename traits::memory_traits > array_type ;
|
|
||||||
|
|
||||||
typedef Kokkos::View< typename traits::const_data_type ,
|
|
||||||
typename traits::array_layout ,
|
|
||||||
typename traits::memory_space ,
|
|
||||||
typename traits::memory_traits > const_type ;
|
|
||||||
|
|
||||||
typedef Kokkos::View< typename traits::non_const_data_type ,
|
|
||||||
typename traits::array_layout ,
|
|
||||||
typename traits::memory_space ,
|
|
||||||
typename traits::memory_traits > non_const_type ;
|
|
||||||
|
|
||||||
typedef Kokkos::View< typename traits::non_const_data_type ,
|
|
||||||
typename traits::array_layout ,
|
|
||||||
HostSpace ,
|
|
||||||
void > HostMirror ;
|
|
||||||
|
|
||||||
template< bool Accessible >
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
typename Kokkos::Impl::enable_if< Accessible , typename traits::size_type >::type
|
|
||||||
dimension_0_intern() const { return nsegments_() * segment_length_ ; }
|
|
||||||
|
|
||||||
template< bool Accessible >
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
typename Kokkos::Impl::enable_if< ! Accessible , typename traits::size_type >::type
|
|
||||||
dimension_0_intern() const
|
|
||||||
{
|
|
||||||
// In Host space
|
|
||||||
int n = 0 ;
|
|
||||||
#if ! defined( __CUDA_ARCH__ )
|
|
||||||
Kokkos::Impl::DeepCopy< HostSpace , typename traits::memory_space >( & n , nsegments_.ptr_on_device() , sizeof(int) );
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return n * segment_length_ ;
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
enum { Rank = traits::rank };
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION offset_map_type shape() const { return m_offset_map ; }
|
|
||||||
|
|
||||||
/* \brief return (current) size of dimension 0 */
|
|
||||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const {
|
|
||||||
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
|
|
||||||
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
|
|
||||||
int n = SegmentedView::dimension_0_intern< Accessible >();
|
|
||||||
return n ;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* \brief return size of dimension 1 */
|
|
||||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; }
|
|
||||||
/* \brief return size of dimension 2 */
|
|
||||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; }
|
|
||||||
/* \brief return size of dimension 3 */
|
|
||||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; }
|
|
||||||
/* \brief return size of dimension 4 */
|
|
||||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; }
|
|
||||||
/* \brief return size of dimension 5 */
|
|
||||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; }
|
|
||||||
/* \brief return size of dimension 6 */
|
|
||||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; }
|
|
||||||
/* \brief return size of dimension 7 */
|
|
||||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; }
|
|
||||||
|
|
||||||
/* \brief return size of dimension 2 */
|
|
||||||
KOKKOS_INLINE_FUNCTION typename traits::size_type size() const {
|
|
||||||
return dimension_0() *
|
|
||||||
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
|
|
||||||
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7 ;
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename iType >
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
typename traits::size_type dimension( const iType & i ) const {
|
|
||||||
if(i==0)
|
|
||||||
return dimension_0();
|
|
||||||
else
|
|
||||||
return Kokkos::Impl::dimension( m_offset_map , i );
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
typename traits::size_type capacity() {
|
|
||||||
return segments_.dimension_0() *
|
|
||||||
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
|
|
||||||
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
typename traits::size_type get_num_segments() {
|
|
||||||
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
|
|
||||||
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
|
|
||||||
int n = SegmentedView::dimension_0_intern< Accessible >();
|
|
||||||
return n/segment_length_ ;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
typename traits::size_type get_max_segments() {
|
|
||||||
return max_segments_;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Constructor that allocates View objects with an initial length of 0.
|
|
||||||
///
|
|
||||||
/// This constructor works mostly like the analogous constructor of View.
|
|
||||||
/// The first argument is a string label, which is entirely for your
|
|
||||||
/// benefit. (Different SegmentedView objects may have the same label if
|
|
||||||
/// you like.) The second argument 'view_length' is the size of the segments.
|
|
||||||
/// This number must be a power of two. The third argument n0 is the maximum
|
|
||||||
/// value for the first dimension of the segmented view. The maximal allocatable
|
|
||||||
/// number of Segments is thus: (n0+view_length-1)/view_length.
|
|
||||||
/// The arguments that follow are the other dimensions of the (1-7) of the
|
|
||||||
/// View objects. For example, for a View with 3 runtime dimensions,
|
|
||||||
/// the first 4 integer arguments will be nonzero:
|
|
||||||
/// SegmentedView("Name",32768,10000000,8,4). This allocates a SegmentedView
|
|
||||||
/// with a maximum of 306 segments of dimension (32768,8,4). The logical size of
|
|
||||||
/// the segmented view is (n,8,4) with n between 0 and 10000000.
|
|
||||||
/// You may omit the integer arguments that follow.
|
|
||||||
template< class LabelType >
|
|
||||||
SegmentedView(const LabelType & label ,
|
|
||||||
const size_t view_length ,
|
|
||||||
const size_t n0 ,
|
|
||||||
const size_t n1 = 0 ,
|
|
||||||
const size_t n2 = 0 ,
|
|
||||||
const size_t n3 = 0 ,
|
|
||||||
const size_t n4 = 0 ,
|
|
||||||
const size_t n5 = 0 ,
|
|
||||||
const size_t n6 = 0 ,
|
|
||||||
const size_t n7 = 0
|
|
||||||
): segment_length_(view_length),segment_length_m1_(view_length-1)
|
|
||||||
{
|
|
||||||
segment_length_log2 = -1;
|
|
||||||
size_t l = segment_length_;
|
|
||||||
while(l>0) {
|
|
||||||
l>>=1;
|
|
||||||
segment_length_log2++;
|
|
||||||
}
|
|
||||||
l = 1<<segment_length_log2;
|
|
||||||
if(l!=segment_length_)
|
|
||||||
Kokkos::Impl::throw_runtime_exception("Kokkos::SegmentedView requires a 'power of 2' segment length");
|
|
||||||
|
|
||||||
max_segments_ = (n0+segment_length_m1_)/segment_length_;
|
|
||||||
|
|
||||||
Impl::DeviceSetAllocatableMemorySize<typename traits::memory_space>(segment_length_*max_segments_*sizeof(typename traits::value_type));
|
|
||||||
|
|
||||||
segments_ = Kokkos::View<t_dev*,typename traits::execution_space>(label , max_segments_);
|
|
||||||
realloc_lock = Kokkos::View<int,typename traits::execution_space>("Lock");
|
|
||||||
nsegments_ = Kokkos::View<int,typename traits::execution_space>("nviews");
|
|
||||||
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n0*n1*n2*n3*n4*n5*n6*n7 );
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
SegmentedView(const SegmentedView& src):
|
|
||||||
segments_(src.segments_),
|
|
||||||
realloc_lock (src.realloc_lock),
|
|
||||||
nsegments_ (src.nsegments_),
|
|
||||||
segment_length_(src.segment_length_),
|
|
||||||
segment_length_m1_(src.segment_length_m1_),
|
|
||||||
max_segments_ (src.max_segments_),
|
|
||||||
segment_length_log2(src.segment_length_log2),
|
|
||||||
m_offset_map (src.m_offset_map)
|
|
||||||
{}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
SegmentedView& operator= (const SegmentedView& src) {
|
|
||||||
segments_ = src.segments_;
|
|
||||||
realloc_lock = src.realloc_lock;
|
|
||||||
nsegments_ = src.nsegments_;
|
|
||||||
segment_length_= src.segment_length_;
|
|
||||||
segment_length_m1_= src.segment_length_m1_;
|
|
||||||
max_segments_ = src.max_segments_;
|
|
||||||
segment_length_log2= src.segment_length_log2;
|
|
||||||
m_offset_map = src.m_offset_map;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
~SegmentedView() {
|
|
||||||
if ( !segments_.tracker().ref_counting()) { return; }
|
|
||||||
size_t ref_count = segments_.tracker().ref_count();
|
|
||||||
if(ref_count == 1u) {
|
|
||||||
Kokkos::fence();
|
|
||||||
typename Kokkos::View<int,typename traits::execution_space>::HostMirror h_nviews("h_nviews");
|
|
||||||
Kokkos::deep_copy(h_nviews,nsegments_);
|
|
||||||
Kokkos::parallel_for(h_nviews(),Impl::delete_segmented_view<DataType , Arg1Type , Arg2Type, Arg3Type>(*this));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
t_dev get_segment(const int& i) const {
|
|
||||||
return segments_[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class MemberType>
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void grow (MemberType& team_member, const size_t& growSize) const {
|
|
||||||
if (growSize>max_segments_*segment_length_) {
|
|
||||||
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(team_member.team_rank()==0) {
|
|
||||||
bool too_small = growSize > segment_length_ * nsegments_();
|
|
||||||
if (too_small) {
|
|
||||||
while(Kokkos::atomic_compare_exchange(&realloc_lock(),0,1) )
|
|
||||||
; // get the lock
|
|
||||||
too_small = growSize > segment_length_ * nsegments_(); // Recheck once we have the lock
|
|
||||||
if(too_small) {
|
|
||||||
while(too_small) {
|
|
||||||
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
|
|
||||||
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
|
|
||||||
typename traits::non_const_value_type* const ptr = new typename traits::non_const_value_type[alloc_size];
|
|
||||||
|
|
||||||
segments_(nsegments_()) =
|
|
||||||
t_dev(ptr,segment_length_,m_offset_map.N1,m_offset_map.N2,m_offset_map.N3,m_offset_map.N4,m_offset_map.N5,m_offset_map.N6,m_offset_map.N7);
|
|
||||||
nsegments_()++;
|
|
||||||
too_small = growSize > segment_length_ * nsegments_();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
realloc_lock() = 0; //release the lock
|
|
||||||
}
|
|
||||||
}
|
|
||||||
team_member.team_barrier();
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void grow_non_thread_safe (const size_t& growSize) const {
|
|
||||||
if (growSize>max_segments_*segment_length_) {
|
|
||||||
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
bool too_small = growSize > segment_length_ * nsegments_();
|
|
||||||
if(too_small) {
|
|
||||||
while(too_small) {
|
|
||||||
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
|
|
||||||
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
|
|
||||||
typename traits::non_const_value_type* const ptr =
|
|
||||||
new typename traits::non_const_value_type[alloc_size];
|
|
||||||
|
|
||||||
segments_(nsegments_()) =
|
|
||||||
t_dev (ptr, segment_length_, m_offset_map.N1, m_offset_map.N2,
|
|
||||||
m_offset_map.N3, m_offset_map.N4, m_offset_map.N5,
|
|
||||||
m_offset_map.N6, m_offset_map.N7);
|
|
||||||
nsegments_()++;
|
|
||||||
too_small = growSize > segment_length_ * nsegments_();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename iType0 >
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
typename std::enable_if<( std::is_integral<iType0>::value && traits::rank == 1 )
|
|
||||||
, typename traits::value_type &
|
|
||||||
>::type
|
|
||||||
operator() ( const iType0 & i0 ) const
|
|
||||||
{
|
|
||||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_));
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename iType0 , typename iType1 >
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
|
||||||
std::is_integral<iType1>::value &&
|
|
||||||
traits::rank == 2 )
|
|
||||||
, typename traits::value_type &
|
|
||||||
>::type
|
|
||||||
operator() ( const iType0 & i0 , const iType1 & i1 ) const
|
|
||||||
{
|
|
||||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1);
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename iType0 , typename iType1 , typename iType2 >
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
|
||||||
std::is_integral<iType1>::value &&
|
|
||||||
std::is_integral<iType2>::value &&
|
|
||||||
traits::rank == 3 )
|
|
||||||
, typename traits::value_type &
|
|
||||||
>::type
|
|
||||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
|
|
||||||
{
|
|
||||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2);
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
|
||||||
std::is_integral<iType1>::value &&
|
|
||||||
std::is_integral<iType2>::value &&
|
|
||||||
std::is_integral<iType3>::value &&
|
|
||||||
traits::rank == 4 )
|
|
||||||
, typename traits::value_type &
|
|
||||||
>::type
|
|
||||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
|
|
||||||
{
|
|
||||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3);
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
|
||||||
typename iType4 >
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
|
||||||
std::is_integral<iType1>::value &&
|
|
||||||
std::is_integral<iType2>::value &&
|
|
||||||
std::is_integral<iType3>::value &&
|
|
||||||
std::is_integral<iType4>::value &&
|
|
||||||
traits::rank == 5 )
|
|
||||||
, typename traits::value_type &
|
|
||||||
>::type
|
|
||||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
|
||||||
const iType4 & i4 ) const
|
|
||||||
{
|
|
||||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4);
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
|
||||||
typename iType4 , typename iType5 >
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
|
||||||
std::is_integral<iType1>::value &&
|
|
||||||
std::is_integral<iType2>::value &&
|
|
||||||
std::is_integral<iType3>::value &&
|
|
||||||
std::is_integral<iType4>::value &&
|
|
||||||
std::is_integral<iType5>::value &&
|
|
||||||
traits::rank == 6 )
|
|
||||||
, typename traits::value_type &
|
|
||||||
>::type
|
|
||||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
|
||||||
const iType4 & i4 , const iType5 & i5 ) const
|
|
||||||
{
|
|
||||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5);
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
|
||||||
typename iType4 , typename iType5 , typename iType6 >
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
|
||||||
std::is_integral<iType1>::value &&
|
|
||||||
std::is_integral<iType2>::value &&
|
|
||||||
std::is_integral<iType3>::value &&
|
|
||||||
std::is_integral<iType4>::value &&
|
|
||||||
std::is_integral<iType5>::value &&
|
|
||||||
std::is_integral<iType6>::value &&
|
|
||||||
traits::rank == 7 )
|
|
||||||
, typename traits::value_type &
|
|
||||||
>::type
|
|
||||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
|
||||||
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
|
|
||||||
{
|
|
||||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6);
|
|
||||||
}
|
|
||||||
|
|
||||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
|
||||||
typename iType4 , typename iType5 , typename iType6 , typename iType7 >
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
|
||||||
std::is_integral<iType1>::value &&
|
|
||||||
std::is_integral<iType2>::value &&
|
|
||||||
std::is_integral<iType3>::value &&
|
|
||||||
std::is_integral<iType4>::value &&
|
|
||||||
std::is_integral<iType5>::value &&
|
|
||||||
std::is_integral<iType6>::value &&
|
|
||||||
std::is_integral<iType7>::value &&
|
|
||||||
traits::rank == 8 )
|
|
||||||
, typename traits::value_type &
|
|
||||||
>::type
|
|
||||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
|
||||||
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const
|
|
||||||
{
|
|
||||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6,i7);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
namespace Impl {
|
|
||||||
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
|
|
||||||
struct delete_segmented_view {
|
|
||||||
typedef SegmentedView<DataType , Arg1Type , Arg2Type, Arg3Type> view_type;
|
|
||||||
typedef typename view_type::execution_space execution_space;
|
|
||||||
|
|
||||||
view_type view_;
|
|
||||||
delete_segmented_view(view_type view):view_(view) {
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (int i) const {
|
|
||||||
delete [] view_.get_segment(i).ptr_on_device();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@ -1,226 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef KOKKOS_STATICCRSGRAPH_HPP
|
|
||||||
#define KOKKOS_STATICCRSGRAPH_HPP
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
|
|
||||||
/// \class StaticCrsGraph
|
|
||||||
/// \brief Compressed row storage array.
|
|
||||||
///
|
|
||||||
/// \tparam DataType The type of stored entries. If a StaticCrsGraph is
|
|
||||||
/// used as the graph of a sparse matrix, then this is usually an
|
|
||||||
/// integer type, the type of the column indices in the sparse
|
|
||||||
/// matrix.
|
|
||||||
///
|
|
||||||
/// \tparam Arg1Type The second template parameter, corresponding
|
|
||||||
/// either to the Device type (if there are no more template
|
|
||||||
/// parameters) or to the Layout type (if there is at least one more
|
|
||||||
/// template parameter).
|
|
||||||
///
|
|
||||||
/// \tparam Arg2Type The third template parameter, which if provided
|
|
||||||
/// corresponds to the Device type.
|
|
||||||
///
|
|
||||||
/// \tparam SizeType The type of row offsets. Usually the default
|
|
||||||
/// parameter suffices. However, setting a nondefault value is
|
|
||||||
/// necessary in some cases, for example, if you want to have a
|
|
||||||
/// sparse matrices with dimensions (and therefore column indices)
|
|
||||||
/// that fit in \c int, but want to store more than <tt>INT_MAX</tt>
|
|
||||||
/// entries in the sparse matrix.
|
|
||||||
///
|
|
||||||
/// A row has a range of entries:
|
|
||||||
/// <ul>
|
|
||||||
/// <li> <tt> row_map[i0] <= entry < row_map[i0+1] </tt> </li>
|
|
||||||
/// <li> <tt> 0 <= i1 < row_map[i0+1] - row_map[i0] </tt> </li>
|
|
||||||
/// <li> <tt> entries( entry , i2 , i3 , ... ); </tt> </li>
|
|
||||||
/// <li> <tt> entries( row_map[i0] + i1 , i2 , i3 , ... ); </tt> </li>
|
|
||||||
/// </ul>
|
|
||||||
template< class DataType,
|
|
||||||
class Arg1Type,
|
|
||||||
class Arg2Type = void,
|
|
||||||
typename SizeType = typename ViewTraits<DataType*, Arg1Type, Arg2Type, void >::size_type>
|
|
||||||
class StaticCrsGraph {
|
|
||||||
private:
|
|
||||||
typedef ViewTraits<DataType*, Arg1Type, Arg2Type, void> traits;
|
|
||||||
|
|
||||||
public:
|
|
||||||
typedef DataType data_type;
|
|
||||||
typedef typename traits::array_layout array_layout;
|
|
||||||
typedef typename traits::execution_space execution_space;
|
|
||||||
typedef typename traits::device_type device_type;
|
|
||||||
typedef SizeType size_type;
|
|
||||||
|
|
||||||
typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type;
|
|
||||||
typedef StaticCrsGraph< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror;
|
|
||||||
typedef View< const size_type* , array_layout, device_type > row_map_type;
|
|
||||||
typedef View< DataType* , array_layout, device_type > entries_type;
|
|
||||||
|
|
||||||
entries_type entries;
|
|
||||||
row_map_type row_map;
|
|
||||||
|
|
||||||
//! Construct an empty view.
|
|
||||||
StaticCrsGraph () : entries(), row_map() {}
|
|
||||||
|
|
||||||
//! Copy constructor (shallow copy).
|
|
||||||
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map)
|
|
||||||
{}
|
|
||||||
|
|
||||||
template<class EntriesType, class RowMapType>
|
|
||||||
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_)
|
|
||||||
{}
|
|
||||||
|
|
||||||
/** \brief Assign to a view of the rhs array.
|
|
||||||
* If the old view is the last view
|
|
||||||
* then allocated memory is deallocated.
|
|
||||||
*/
|
|
||||||
StaticCrsGraph& operator= (const StaticCrsGraph& rhs) {
|
|
||||||
entries = rhs.entries;
|
|
||||||
row_map = rhs.row_map;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Destroy this view of the array.
|
|
||||||
* If the last view then allocated memory is deallocated.
|
|
||||||
*/
|
|
||||||
~StaticCrsGraph() {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
size_type numRows() const {
|
|
||||||
return (row_map.dimension_0 () != 0) ?
|
|
||||||
row_map.dimension_0 () - static_cast<size_type> (1) :
|
|
||||||
static_cast<size_type> (0);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
template< class StaticCrsGraphType , class InputSizeType >
|
|
||||||
typename StaticCrsGraphType::staticcrsgraph_type
|
|
||||||
create_staticcrsgraph( const std::string & label ,
|
|
||||||
const std::vector< InputSizeType > & input );
|
|
||||||
|
|
||||||
template< class StaticCrsGraphType , class InputSizeType >
|
|
||||||
typename StaticCrsGraphType::staticcrsgraph_type
|
|
||||||
create_staticcrsgraph( const std::string & label ,
|
|
||||||
const std::vector< std::vector< InputSizeType > > & input );
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
template< class DataType ,
|
|
||||||
class Arg1Type ,
|
|
||||||
class Arg2Type ,
|
|
||||||
typename SizeType >
|
|
||||||
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
|
||||||
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
|
|
||||||
|
|
||||||
template< class DataType ,
|
|
||||||
class Arg1Type ,
|
|
||||||
class Arg2Type ,
|
|
||||||
typename SizeType >
|
|
||||||
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
|
||||||
create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
|
|
||||||
|
|
||||||
} // namespace Kokkos
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
#include <impl/Kokkos_StaticCrsGraph_factory.hpp>
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
namespace Impl {
|
|
||||||
|
|
||||||
template< class GraphType >
|
|
||||||
struct StaticCrsGraphMaximumEntry {
|
|
||||||
|
|
||||||
typedef typename GraphType::execution_space execution_space ;
|
|
||||||
typedef typename GraphType::data_type value_type ;
|
|
||||||
|
|
||||||
const typename GraphType::entries_type entries ;
|
|
||||||
|
|
||||||
StaticCrsGraphMaximumEntry( const GraphType & graph ) : entries( graph.entries ) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()( const unsigned i , value_type & update ) const
|
|
||||||
{ if ( update < entries(i) ) update = entries(i); }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void init( value_type & update ) const
|
|
||||||
{ update = 0 ; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void join( volatile value_type & update ,
|
|
||||||
volatile const value_type & input ) const
|
|
||||||
{ if ( update < input ) update = input ; }
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class DataType, class Arg1Type, class Arg2Type, typename SizeType >
|
|
||||||
DataType maximum_entry( const StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > & graph )
|
|
||||||
{
|
|
||||||
typedef StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType> GraphType ;
|
|
||||||
typedef Impl::StaticCrsGraphMaximumEntry< GraphType > FunctorType ;
|
|
||||||
|
|
||||||
DataType result = 0 ;
|
|
||||||
Kokkos::parallel_reduce( graph.entries.dimension_0(),
|
|
||||||
FunctorType(graph), result );
|
|
||||||
return result ;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Kokkos
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
#endif /* #ifndef KOKKOS_CRSARRAY_HPP */
|
|
||||||
|
|
||||||
@ -1,848 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
/// \file Kokkos_UnorderedMap.hpp
|
|
||||||
/// \brief Declaration and definition of Kokkos::UnorderedMap.
|
|
||||||
///
|
|
||||||
/// This header file declares and defines Kokkos::UnorderedMap and its
|
|
||||||
/// related nonmember functions.
|
|
||||||
|
|
||||||
#ifndef KOKKOS_UNORDERED_MAP_HPP
|
|
||||||
#define KOKKOS_UNORDERED_MAP_HPP
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
#include <Kokkos_Functional.hpp>
|
|
||||||
|
|
||||||
#include <Kokkos_Bitset.hpp>
|
|
||||||
|
|
||||||
#include <impl/Kokkos_Traits.hpp>
|
|
||||||
#include <impl/Kokkos_UnorderedMap_impl.hpp>
|
|
||||||
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdexcept>
|
|
||||||
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
|
|
||||||
enum { UnorderedMapInvalidIndex = ~0u };
|
|
||||||
|
|
||||||
/// \brief First element of the return value of UnorderedMap::insert().
|
|
||||||
///
|
|
||||||
/// Inserting an element into an UnorderedMap is not guaranteed to
|
|
||||||
/// succeed. There are three possible conditions:
|
|
||||||
/// <ol>
|
|
||||||
/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually
|
|
||||||
/// means that the UnorderedMap ran out of space. </li>
|
|
||||||
/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key
|
|
||||||
/// did <i>not</i> exist in the table before. </li>
|
|
||||||
/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key
|
|
||||||
/// <i>did</i> exist in the table before. The new value was
|
|
||||||
/// ignored and the old value was left in place. </li>
|
|
||||||
/// </ol>
|
|
||||||
|
|
||||||
class UnorderedMapInsertResult
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
enum Status{
|
|
||||||
SUCCESS = 1u << 31
|
|
||||||
, EXISTING = 1u << 30
|
|
||||||
, FREED_EXISTING = 1u << 29
|
|
||||||
, LIST_LENGTH_MASK = ~(SUCCESS | EXISTING | FREED_EXISTING)
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
|
||||||
/// Did the map successful insert the key/value pair
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool success() const { return (m_status & SUCCESS); }
|
|
||||||
|
|
||||||
/// Was the key already present in the map
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool existing() const { return (m_status & EXISTING); }
|
|
||||||
|
|
||||||
/// Did the map fail to insert the key due to insufficent capacity
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool failed() const { return m_index == UnorderedMapInvalidIndex; }
|
|
||||||
|
|
||||||
/// Did the map lose a race condition to insert a dupulicate key/value pair
|
|
||||||
/// where an index was claimed that needed to be released
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool freed_existing() const { return (m_status & FREED_EXISTING); }
|
|
||||||
|
|
||||||
/// How many iterations through the insert loop did it take before the
|
|
||||||
/// map returned
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
uint32_t list_position() const { return (m_status & LIST_LENGTH_MASK); }
|
|
||||||
|
|
||||||
/// Index where the key can be found as long as the insert did not fail
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
uint32_t index() const { return m_index; }
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
UnorderedMapInsertResult()
|
|
||||||
: m_index(UnorderedMapInvalidIndex)
|
|
||||||
, m_status(0)
|
|
||||||
{}
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
void increment_list_position()
|
|
||||||
{
|
|
||||||
m_status += (list_position() < LIST_LENGTH_MASK) ? 1u : 0u;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
void set_existing(uint32_t i, bool arg_freed_existing)
|
|
||||||
{
|
|
||||||
m_index = i;
|
|
||||||
m_status = EXISTING | (arg_freed_existing ? FREED_EXISTING : 0u) | list_position();
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
void set_success(uint32_t i)
|
|
||||||
{
|
|
||||||
m_index = i;
|
|
||||||
m_status = SUCCESS | list_position();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
uint32_t m_index;
|
|
||||||
uint32_t m_status;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// \class UnorderedMap
|
|
||||||
/// \brief Thread-safe, performance-portable lookup table.
|
|
||||||
///
|
|
||||||
/// This class provides a lookup table. In terms of functionality,
|
|
||||||
/// this class compares to std::unordered_map (new in C++11).
|
|
||||||
/// "Unordered" means that keys are not stored in any particular
|
|
||||||
/// order, unlike (for example) std::map. "Thread-safe" means that
|
|
||||||
/// lookups, insertion, and deletion are safe to call by multiple
|
|
||||||
/// threads in parallel. "Performance-portable" means that parallel
|
|
||||||
/// performance of these operations is reasonable, on multiple
|
|
||||||
/// hardware platforms. Platforms on which performance has been
|
|
||||||
/// tested include conventional Intel x86 multicore processors, Intel
|
|
||||||
/// Xeon Phi ("MIC"), and NVIDIA GPUs.
|
|
||||||
///
|
|
||||||
/// Parallel performance portability entails design decisions that
|
|
||||||
/// might differ from one's expectation for a sequential interface.
|
|
||||||
/// This particularly affects insertion of single elements. In an
|
|
||||||
/// interface intended for sequential use, insertion might reallocate
|
|
||||||
/// memory if the original allocation did not suffice to hold the new
|
|
||||||
/// element. In this class, insertion does <i>not</i> reallocate
|
|
||||||
/// memory. This means that it might fail. insert() returns an enum
|
|
||||||
/// which indicates whether the insert failed. There are three
|
|
||||||
/// possible conditions:
|
|
||||||
/// <ol>
|
|
||||||
/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually
|
|
||||||
/// means that the UnorderedMap ran out of space. </li>
|
|
||||||
/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key
|
|
||||||
/// did <i>not</i> exist in the table before. </li>
|
|
||||||
/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key
|
|
||||||
/// <i>did</i> exist in the table before. The new value was
|
|
||||||
/// ignored and the old value was left in place. </li>
|
|
||||||
/// </ol>
|
|
||||||
///
|
|
||||||
/// \tparam Key Type of keys of the lookup table. If \c const, users
|
|
||||||
/// are not allowed to add or remove keys, though they are allowed
|
|
||||||
/// to change values. In that case, the implementation may make
|
|
||||||
/// optimizations specific to the <tt>Device</tt>. For example, if
|
|
||||||
/// <tt>Device</tt> is \c Cuda, it may use texture fetches to access
|
|
||||||
/// keys.
|
|
||||||
///
|
|
||||||
/// \tparam Value Type of values stored in the lookup table. You may use
|
|
||||||
/// \c void here, in which case the table will be a set of keys. If
|
|
||||||
/// \c const, users are not allowed to change entries.
|
|
||||||
/// In that case, the implementation may make
|
|
||||||
/// optimizations specific to the \c Device, such as using texture
|
|
||||||
/// fetches to access values.
|
|
||||||
///
|
|
||||||
/// \tparam Device The Kokkos Device type.
|
|
||||||
///
|
|
||||||
/// \tparam Hasher Definition of the hash function for instances of
|
|
||||||
/// <tt>Key</tt>. The default will calculate a bitwise hash.
|
|
||||||
///
|
|
||||||
/// \tparam EqualTo Definition of the equality function for instances of
|
|
||||||
/// <tt>Key</tt>. The default will do a bitwise equality comparison.
|
|
||||||
///
|
|
||||||
template < typename Key
|
|
||||||
, typename Value
|
|
||||||
, typename Device = Kokkos::DefaultExecutionSpace
|
|
||||||
, typename Hasher = pod_hash<typename Impl::remove_const<Key>::type>
|
|
||||||
, typename EqualTo = pod_equal_to<typename Impl::remove_const<Key>::type>
|
|
||||||
>
|
|
||||||
class UnorderedMap
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
typedef typename ViewTraits<Key,Device,void,void>::host_mirror_space host_mirror_space ;
|
|
||||||
public:
|
|
||||||
//! \name Public types and constants
|
|
||||||
//@{
|
|
||||||
|
|
||||||
//key_types
|
|
||||||
typedef Key declared_key_type;
|
|
||||||
typedef typename Impl::remove_const<declared_key_type>::type key_type;
|
|
||||||
typedef typename Impl::add_const<key_type>::type const_key_type;
|
|
||||||
|
|
||||||
//value_types
|
|
||||||
typedef Value declared_value_type;
|
|
||||||
typedef typename Impl::remove_const<declared_value_type>::type value_type;
|
|
||||||
typedef typename Impl::add_const<value_type>::type const_value_type;
|
|
||||||
|
|
||||||
typedef Device execution_space;
|
|
||||||
typedef Hasher hasher_type;
|
|
||||||
typedef EqualTo equal_to_type;
|
|
||||||
typedef uint32_t size_type;
|
|
||||||
|
|
||||||
//map_types
|
|
||||||
typedef UnorderedMap<declared_key_type,declared_value_type,execution_space,hasher_type,equal_to_type> declared_map_type;
|
|
||||||
typedef UnorderedMap<key_type,value_type,execution_space,hasher_type,equal_to_type> insertable_map_type;
|
|
||||||
typedef UnorderedMap<const_key_type,value_type,execution_space,hasher_type,equal_to_type> modifiable_map_type;
|
|
||||||
typedef UnorderedMap<const_key_type,const_value_type,execution_space,hasher_type,equal_to_type> const_map_type;
|
|
||||||
|
|
||||||
static const bool is_set = Impl::is_same<void,value_type>::value;
|
|
||||||
static const bool has_const_key = Impl::is_same<const_key_type,declared_key_type>::value;
|
|
||||||
static const bool has_const_value = is_set || Impl::is_same<const_value_type,declared_value_type>::value;
|
|
||||||
|
|
||||||
static const bool is_insertable_map = !has_const_key && (is_set || !has_const_value);
|
|
||||||
static const bool is_modifiable_map = has_const_key && !has_const_value;
|
|
||||||
static const bool is_const_map = has_const_key && has_const_value;
|
|
||||||
|
|
||||||
|
|
||||||
typedef UnorderedMapInsertResult insert_result;
|
|
||||||
|
|
||||||
typedef UnorderedMap<Key,Value,host_mirror_space,Hasher,EqualTo> HostMirror;
|
|
||||||
|
|
||||||
typedef Impl::UnorderedMapHistogram<const_map_type> histogram_type;
|
|
||||||
|
|
||||||
//@}
|
|
||||||
|
|
||||||
private:
|
|
||||||
enum { invalid_index = ~static_cast<size_type>(0) };
|
|
||||||
|
|
||||||
typedef typename Impl::if_c< is_set, int, declared_value_type>::type impl_value_type;
|
|
||||||
|
|
||||||
typedef typename Impl::if_c< is_insertable_map
|
|
||||||
, View< key_type *, execution_space>
|
|
||||||
, View< const key_type *, execution_space, MemoryTraits<RandomAccess> >
|
|
||||||
>::type key_type_view;
|
|
||||||
|
|
||||||
typedef typename Impl::if_c< is_insertable_map || is_modifiable_map
|
|
||||||
, View< impl_value_type *, execution_space>
|
|
||||||
, View< const impl_value_type *, execution_space, MemoryTraits<RandomAccess> >
|
|
||||||
>::type value_type_view;
|
|
||||||
|
|
||||||
typedef typename Impl::if_c< is_insertable_map
|
|
||||||
, View< size_type *, execution_space>
|
|
||||||
, View< const size_type *, execution_space, MemoryTraits<RandomAccess> >
|
|
||||||
>::type size_type_view;
|
|
||||||
|
|
||||||
typedef typename Impl::if_c< is_insertable_map
|
|
||||||
, Bitset< execution_space >
|
|
||||||
, ConstBitset< execution_space>
|
|
||||||
>::type bitset_type;
|
|
||||||
|
|
||||||
enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 };
|
|
||||||
enum { num_scalars = 3 };
|
|
||||||
typedef View< int[num_scalars], LayoutLeft, execution_space> scalars_view;
|
|
||||||
|
|
||||||
public:
|
|
||||||
//! \name Public member functions
|
|
||||||
//@{
|
|
||||||
|
|
||||||
UnorderedMap()
|
|
||||||
: m_bounded_insert()
|
|
||||||
, m_hasher()
|
|
||||||
, m_equal_to()
|
|
||||||
, m_size()
|
|
||||||
, m_available_indexes()
|
|
||||||
, m_hash_lists()
|
|
||||||
, m_next_index()
|
|
||||||
, m_keys()
|
|
||||||
, m_values()
|
|
||||||
, m_scalars()
|
|
||||||
{}
|
|
||||||
|
|
||||||
/// \brief Constructor
|
|
||||||
///
|
|
||||||
/// \param capacity_hint [in] Initial guess of how many unique keys will be inserted into the map
|
|
||||||
/// \param hash [in] Hasher function for \c Key instances. The
|
|
||||||
/// default value usually suffices.
|
|
||||||
UnorderedMap( size_type capacity_hint, hasher_type hasher = hasher_type(), equal_to_type equal_to = equal_to_type() )
|
|
||||||
: m_bounded_insert(true)
|
|
||||||
, m_hasher(hasher)
|
|
||||||
, m_equal_to(equal_to)
|
|
||||||
, m_size()
|
|
||||||
, m_available_indexes(calculate_capacity(capacity_hint))
|
|
||||||
, m_hash_lists(ViewAllocateWithoutInitializing("UnorderedMap hash list"), Impl::find_hash_size(capacity()))
|
|
||||||
, m_next_index(ViewAllocateWithoutInitializing("UnorderedMap next index"), capacity()+1) // +1 so that the *_at functions can always return a valid reference
|
|
||||||
, m_keys("UnorderedMap keys",capacity()+1)
|
|
||||||
, m_values("UnorderedMap values",(is_set? 1 : capacity()+1))
|
|
||||||
, m_scalars("UnorderedMap scalars")
|
|
||||||
{
|
|
||||||
if (!is_insertable_map) {
|
|
||||||
throw std::runtime_error("Cannot construct a non-insertable (i.e. const key_type) unordered_map");
|
|
||||||
}
|
|
||||||
|
|
||||||
Kokkos::deep_copy(m_hash_lists, invalid_index);
|
|
||||||
Kokkos::deep_copy(m_next_index, invalid_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
void reset_failed_insert_flag()
|
|
||||||
{
|
|
||||||
reset_flag(failed_insert_idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
histogram_type get_histogram()
|
|
||||||
{
|
|
||||||
return histogram_type(*this);
|
|
||||||
}
|
|
||||||
|
|
||||||
//! Clear all entries in the table.
|
|
||||||
void clear()
|
|
||||||
{
|
|
||||||
m_bounded_insert = true;
|
|
||||||
|
|
||||||
if (capacity() == 0) return;
|
|
||||||
|
|
||||||
m_available_indexes.clear();
|
|
||||||
|
|
||||||
Kokkos::deep_copy(m_hash_lists, invalid_index);
|
|
||||||
Kokkos::deep_copy(m_next_index, invalid_index);
|
|
||||||
{
|
|
||||||
const key_type tmp = key_type();
|
|
||||||
Kokkos::deep_copy(m_keys,tmp);
|
|
||||||
}
|
|
||||||
if (is_set){
|
|
||||||
const impl_value_type tmp = impl_value_type();
|
|
||||||
Kokkos::deep_copy(m_values,tmp);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
Kokkos::deep_copy(m_scalars, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Change the capacity of the the map
|
|
||||||
///
|
|
||||||
/// If there are no failed inserts the current size of the map will
|
|
||||||
/// be used as a lower bound for the input capacity.
|
|
||||||
/// If the map is not empty and does not have failed inserts
|
|
||||||
/// and the capacity changes then the current data is copied
|
|
||||||
/// into the resized / rehashed map.
|
|
||||||
///
|
|
||||||
/// This is <i>not</i> a device function; it may <i>not</i> be
|
|
||||||
/// called in a parallel kernel.
|
|
||||||
bool rehash(size_type requested_capacity = 0)
|
|
||||||
{
|
|
||||||
const bool bounded_insert = (capacity() == 0) || (size() == 0u);
|
|
||||||
return rehash(requested_capacity, bounded_insert );
|
|
||||||
}
|
|
||||||
|
|
||||||
bool rehash(size_type requested_capacity, bool bounded_insert)
|
|
||||||
{
|
|
||||||
if(!is_insertable_map) return false;
|
|
||||||
|
|
||||||
const size_type curr_size = size();
|
|
||||||
requested_capacity = (requested_capacity < curr_size) ? curr_size : requested_capacity;
|
|
||||||
|
|
||||||
insertable_map_type tmp(requested_capacity, m_hasher, m_equal_to);
|
|
||||||
|
|
||||||
if (curr_size) {
|
|
||||||
tmp.m_bounded_insert = false;
|
|
||||||
Impl::UnorderedMapRehash<insertable_map_type> f(tmp,*this);
|
|
||||||
f.apply();
|
|
||||||
}
|
|
||||||
tmp.m_bounded_insert = bounded_insert;
|
|
||||||
|
|
||||||
*this = tmp;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief The number of entries in the table.
|
|
||||||
///
|
|
||||||
/// This method has undefined behavior when erasable() is true.
|
|
||||||
///
|
|
||||||
/// Note that this is not a device function; it cannot be called in
|
|
||||||
/// a parallel kernel. The value is not stored as a variable; it
|
|
||||||
/// must be computed.
|
|
||||||
size_type size() const
|
|
||||||
{
|
|
||||||
if( capacity() == 0u ) return 0u;
|
|
||||||
if (modified()) {
|
|
||||||
m_size = m_available_indexes.count();
|
|
||||||
reset_flag(modified_idx);
|
|
||||||
}
|
|
||||||
return m_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief The current number of failed insert() calls.
|
|
||||||
///
|
|
||||||
/// This is <i>not</i> a device function; it may <i>not</i> be
|
|
||||||
/// called in a parallel kernel. The value is not stored as a
|
|
||||||
/// variable; it must be computed.
|
|
||||||
bool failed_insert() const
|
|
||||||
{
|
|
||||||
return get_flag(failed_insert_idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool erasable() const
|
|
||||||
{
|
|
||||||
return is_insertable_map ? get_flag(erasable_idx) : false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool begin_erase()
|
|
||||||
{
|
|
||||||
bool result = !erasable();
|
|
||||||
if (is_insertable_map && result) {
|
|
||||||
execution_space::fence();
|
|
||||||
set_flag(erasable_idx);
|
|
||||||
execution_space::fence();
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool end_erase()
|
|
||||||
{
|
|
||||||
bool result = erasable();
|
|
||||||
if (is_insertable_map && result) {
|
|
||||||
execution_space::fence();
|
|
||||||
Impl::UnorderedMapErase<declared_map_type> f(*this);
|
|
||||||
f.apply();
|
|
||||||
execution_space::fence();
|
|
||||||
reset_flag(erasable_idx);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief The maximum number of entries that the table can hold.
|
|
||||||
///
|
|
||||||
/// This <i>is</i> a device function; it may be called in a parallel
|
|
||||||
/// kernel.
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
size_type capacity() const
|
|
||||||
{ return m_available_indexes.size(); }
|
|
||||||
|
|
||||||
/// \brief The number of hash table "buckets."
|
|
||||||
///
|
|
||||||
/// This is different than the number of entries that the table can
|
|
||||||
/// hold. Each key hashes to an index in [0, hash_capacity() - 1].
|
|
||||||
/// That index can hold zero or more entries. This class decides
|
|
||||||
/// what hash_capacity() should be, given the user's upper bound on
|
|
||||||
/// the number of entries the table must be able to hold.
|
|
||||||
///
|
|
||||||
/// This <i>is</i> a device function; it may be called in a parallel
|
|
||||||
/// kernel.
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
size_type hash_capacity() const
|
|
||||||
{ return m_hash_lists.dimension_0(); }
|
|
||||||
|
|
||||||
//---------------------------------------------------------------------------
|
|
||||||
//---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
/// This <i>is</i> a device function; it may be called in a parallel
|
|
||||||
/// kernel. As discussed in the class documentation, it need not
|
|
||||||
/// succeed. The return value tells you if it did.
|
|
||||||
///
|
|
||||||
/// \param k [in] The key to attempt to insert.
|
|
||||||
/// \param v [in] The corresponding value to attempt to insert. If
|
|
||||||
/// using this class as a set (with Value = void), then you need not
|
|
||||||
/// provide this value.
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
insert_result insert(key_type const& k, impl_value_type const&v = impl_value_type()) const
|
|
||||||
{
|
|
||||||
insert_result result;
|
|
||||||
|
|
||||||
if ( !is_insertable_map || capacity() == 0u || m_scalars((int)erasable_idx) ) {
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( !m_scalars((int)modified_idx) ) {
|
|
||||||
m_scalars((int)modified_idx) = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int volatile & failed_insert_ref = m_scalars((int)failed_insert_idx) ;
|
|
||||||
|
|
||||||
const size_type hash_value = m_hasher(k);
|
|
||||||
const size_type hash_list = hash_value % m_hash_lists.dimension_0();
|
|
||||||
|
|
||||||
size_type * curr_ptr = & m_hash_lists[ hash_list ];
|
|
||||||
size_type new_index = invalid_index ;
|
|
||||||
|
|
||||||
// Force integer multiply to long
|
|
||||||
size_type index_hint = static_cast<size_type>( (static_cast<double>(hash_list) * capacity()) / m_hash_lists.dimension_0());
|
|
||||||
|
|
||||||
size_type find_attempts = 0;
|
|
||||||
|
|
||||||
enum { bounded_find_attempts = 32u };
|
|
||||||
const size_type max_attempts = (m_bounded_insert && (bounded_find_attempts < m_available_indexes.max_hint()) ) ?
|
|
||||||
bounded_find_attempts :
|
|
||||||
m_available_indexes.max_hint();
|
|
||||||
|
|
||||||
bool not_done = true ;
|
|
||||||
|
|
||||||
#if defined( __MIC__ )
|
|
||||||
#pragma noprefetch
|
|
||||||
#endif
|
|
||||||
while ( not_done ) {
|
|
||||||
|
|
||||||
// Continue searching the unordered list for this key,
|
|
||||||
// list will only be appended during insert phase.
|
|
||||||
// Need volatile_load as other threads may be appending.
|
|
||||||
size_type curr = volatile_load(curr_ptr);
|
|
||||||
|
|
||||||
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
|
|
||||||
#if defined( __MIC__ )
|
|
||||||
#pragma noprefetch
|
|
||||||
#endif
|
|
||||||
while ( curr != invalid_index && ! m_equal_to( volatile_load(&m_keys[curr]), k) ) {
|
|
||||||
result.increment_list_position();
|
|
||||||
index_hint = curr;
|
|
||||||
curr_ptr = &m_next_index[curr];
|
|
||||||
curr = volatile_load(curr_ptr);
|
|
||||||
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------
|
|
||||||
// If key already present then return that index.
|
|
||||||
if ( curr != invalid_index ) {
|
|
||||||
|
|
||||||
const bool free_existing = new_index != invalid_index;
|
|
||||||
if ( free_existing ) {
|
|
||||||
// Previously claimed an unused entry that was not inserted.
|
|
||||||
// Release this unused entry immediately.
|
|
||||||
if (!m_available_indexes.reset(new_index) ) {
|
|
||||||
printf("Unable to free existing\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
result.set_existing(curr, free_existing);
|
|
||||||
not_done = false ;
|
|
||||||
}
|
|
||||||
//------------------------------------------------------------
|
|
||||||
// Key is not currently in the map.
|
|
||||||
// If the thread has claimed an entry try to insert now.
|
|
||||||
else {
|
|
||||||
|
|
||||||
//------------------------------------------------------------
|
|
||||||
// If have not already claimed an unused entry then do so now.
|
|
||||||
if (new_index == invalid_index) {
|
|
||||||
|
|
||||||
bool found = false;
|
|
||||||
// use the hash_list as the flag for the search direction
|
|
||||||
Kokkos::tie(found, index_hint) = m_available_indexes.find_any_unset_near( index_hint, hash_list );
|
|
||||||
|
|
||||||
// found and index and this thread set it
|
|
||||||
if ( !found && ++find_attempts >= max_attempts ) {
|
|
||||||
failed_insert_ref = true;
|
|
||||||
not_done = false ;
|
|
||||||
}
|
|
||||||
else if (m_available_indexes.set(index_hint) ) {
|
|
||||||
new_index = index_hint;
|
|
||||||
// Set key and value
|
|
||||||
KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_keys[new_index]);
|
|
||||||
m_keys[new_index] = k ;
|
|
||||||
|
|
||||||
if (!is_set) {
|
|
||||||
KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_values[new_index]);
|
|
||||||
m_values[new_index] = v ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Do not proceed until key and value are updated in global memory
|
|
||||||
memory_fence();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (failed_insert_ref) {
|
|
||||||
not_done = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attempt to append claimed entry into the list.
|
|
||||||
// Another thread may also be trying to append the same list so protect with atomic.
|
|
||||||
if ( new_index != invalid_index &&
|
|
||||||
curr == atomic_compare_exchange(curr_ptr, static_cast<size_type>(invalid_index), new_index) ) {
|
|
||||||
// Succeeded in appending
|
|
||||||
result.set_success(new_index);
|
|
||||||
not_done = false ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // while ( not_done )
|
|
||||||
|
|
||||||
return result ;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
bool erase(key_type const& k) const
|
|
||||||
{
|
|
||||||
bool result = false;
|
|
||||||
|
|
||||||
if(is_insertable_map && 0u < capacity() && m_scalars((int)erasable_idx)) {
|
|
||||||
|
|
||||||
if ( ! m_scalars((int)modified_idx) ) {
|
|
||||||
m_scalars((int)modified_idx) = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_type index = find(k);
|
|
||||||
if (valid_at(index)) {
|
|
||||||
m_available_indexes.reset(index);
|
|
||||||
result = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Find the given key \c k, if it exists in the table.
|
|
||||||
///
|
|
||||||
/// \return If the key exists in the table, the index of the
|
|
||||||
/// value corresponding to that key; otherwise, an invalid index.
|
|
||||||
///
|
|
||||||
/// This <i>is</i> a device function; it may be called in a parallel
|
|
||||||
/// kernel.
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
size_type find( const key_type & k) const
|
|
||||||
{
|
|
||||||
size_type curr = 0u < capacity() ? m_hash_lists( m_hasher(k) % m_hash_lists.dimension_0() ) : invalid_index ;
|
|
||||||
|
|
||||||
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
|
|
||||||
while (curr != invalid_index && !m_equal_to( m_keys[curr], k) ) {
|
|
||||||
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
|
|
||||||
curr = m_next_index[curr];
|
|
||||||
}
|
|
||||||
|
|
||||||
return curr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Does the key exist in the map
|
|
||||||
///
|
|
||||||
/// This <i>is</i> a device function; it may be called in a parallel
|
|
||||||
/// kernel.
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
bool exists( const key_type & k) const
|
|
||||||
{
|
|
||||||
return valid_at(find(k));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// \brief Get the value with \c i as its direct index.
|
|
||||||
///
|
|
||||||
/// \param i [in] Index directly into the array of entries.
|
|
||||||
///
|
|
||||||
/// This <i>is</i> a device function; it may be called in a parallel
|
|
||||||
/// kernel.
|
|
||||||
///
|
|
||||||
/// 'const value_type' via Cuda texture fetch must return by value.
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
typename Impl::if_c< (is_set || has_const_value), impl_value_type, impl_value_type &>::type
|
|
||||||
value_at(size_type i) const
|
|
||||||
{
|
|
||||||
return m_values[ is_set ? 0 : (i < capacity() ? i : capacity()) ];
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Get the key with \c i as its direct index.
|
|
||||||
///
|
|
||||||
/// \param i [in] Index directly into the array of entries.
|
|
||||||
///
|
|
||||||
/// This <i>is</i> a device function; it may be called in a parallel
|
|
||||||
/// kernel.
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
key_type key_at(size_type i) const
|
|
||||||
{
|
|
||||||
return m_keys[ i < capacity() ? i : capacity() ];
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool valid_at(size_type i) const
|
|
||||||
{
|
|
||||||
return m_available_indexes.test(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename SKey, typename SValue>
|
|
||||||
UnorderedMap( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src,
|
|
||||||
typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value,int>::type = 0
|
|
||||||
)
|
|
||||||
: m_bounded_insert(src.m_bounded_insert)
|
|
||||||
, m_hasher(src.m_hasher)
|
|
||||||
, m_equal_to(src.m_equal_to)
|
|
||||||
, m_size(src.m_size)
|
|
||||||
, m_available_indexes(src.m_available_indexes)
|
|
||||||
, m_hash_lists(src.m_hash_lists)
|
|
||||||
, m_next_index(src.m_next_index)
|
|
||||||
, m_keys(src.m_keys)
|
|
||||||
, m_values(src.m_values)
|
|
||||||
, m_scalars(src.m_scalars)
|
|
||||||
{}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename SKey, typename SValue>
|
|
||||||
typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value
|
|
||||||
,declared_map_type & >::type
|
|
||||||
operator=( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src)
|
|
||||||
{
|
|
||||||
m_bounded_insert = src.m_bounded_insert;
|
|
||||||
m_hasher = src.m_hasher;
|
|
||||||
m_equal_to = src.m_equal_to;
|
|
||||||
m_size = src.m_size;
|
|
||||||
m_available_indexes = src.m_available_indexes;
|
|
||||||
m_hash_lists = src.m_hash_lists;
|
|
||||||
m_next_index = src.m_next_index;
|
|
||||||
m_keys = src.m_keys;
|
|
||||||
m_values = src.m_values;
|
|
||||||
m_scalars = src.m_scalars;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename SKey, typename SValue, typename SDevice>
|
|
||||||
typename Impl::enable_if< Impl::is_same< typename Impl::remove_const<SKey>::type, key_type>::value &&
|
|
||||||
Impl::is_same< typename Impl::remove_const<SValue>::type, value_type>::value
|
|
||||||
>::type
|
|
||||||
create_copy_view( UnorderedMap<SKey, SValue, SDevice, Hasher,EqualTo> const& src)
|
|
||||||
{
|
|
||||||
if (m_hash_lists.ptr_on_device() != src.m_hash_lists.ptr_on_device()) {
|
|
||||||
|
|
||||||
insertable_map_type tmp;
|
|
||||||
|
|
||||||
tmp.m_bounded_insert = src.m_bounded_insert;
|
|
||||||
tmp.m_hasher = src.m_hasher;
|
|
||||||
tmp.m_equal_to = src.m_equal_to;
|
|
||||||
tmp.m_size = src.size();
|
|
||||||
tmp.m_available_indexes = bitset_type( src.capacity() );
|
|
||||||
tmp.m_hash_lists = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap hash list"), src.m_hash_lists.dimension_0() );
|
|
||||||
tmp.m_next_index = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap next index"), src.m_next_index.dimension_0() );
|
|
||||||
tmp.m_keys = key_type_view( ViewAllocateWithoutInitializing("UnorderedMap keys"), src.m_keys.dimension_0() );
|
|
||||||
tmp.m_values = value_type_view( ViewAllocateWithoutInitializing("UnorderedMap values"), src.m_values.dimension_0() );
|
|
||||||
tmp.m_scalars = scalars_view("UnorderedMap scalars");
|
|
||||||
|
|
||||||
Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes);
|
|
||||||
|
|
||||||
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, typename SDevice::memory_space > raw_deep_copy;
|
|
||||||
|
|
||||||
raw_deep_copy(tmp.m_hash_lists.ptr_on_device(), src.m_hash_lists.ptr_on_device(), sizeof(size_type)*src.m_hash_lists.dimension_0());
|
|
||||||
raw_deep_copy(tmp.m_next_index.ptr_on_device(), src.m_next_index.ptr_on_device(), sizeof(size_type)*src.m_next_index.dimension_0());
|
|
||||||
raw_deep_copy(tmp.m_keys.ptr_on_device(), src.m_keys.ptr_on_device(), sizeof(key_type)*src.m_keys.dimension_0());
|
|
||||||
if (!is_set) {
|
|
||||||
raw_deep_copy(tmp.m_values.ptr_on_device(), src.m_values.ptr_on_device(), sizeof(impl_value_type)*src.m_values.dimension_0());
|
|
||||||
}
|
|
||||||
raw_deep_copy(tmp.m_scalars.ptr_on_device(), src.m_scalars.ptr_on_device(), sizeof(int)*num_scalars );
|
|
||||||
|
|
||||||
*this = tmp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//@}
|
|
||||||
private: // private member functions
|
|
||||||
|
|
||||||
bool modified() const
|
|
||||||
{
|
|
||||||
return get_flag(modified_idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_flag(int flag) const
|
|
||||||
{
|
|
||||||
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
|
||||||
const int true_ = true;
|
|
||||||
raw_deep_copy(m_scalars.ptr_on_device() + flag, &true_, sizeof(int));
|
|
||||||
}
|
|
||||||
|
|
||||||
void reset_flag(int flag) const
|
|
||||||
{
|
|
||||||
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
|
||||||
const int false_ = false;
|
|
||||||
raw_deep_copy(m_scalars.ptr_on_device() + flag, &false_, sizeof(int));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool get_flag(int flag) const
|
|
||||||
{
|
|
||||||
typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename execution_space::memory_space > raw_deep_copy;
|
|
||||||
int result = false;
|
|
||||||
raw_deep_copy(&result, m_scalars.ptr_on_device() + flag, sizeof(int));
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint32_t calculate_capacity(uint32_t capacity_hint)
|
|
||||||
{
|
|
||||||
// increase by 16% and round to nears multiple of 128
|
|
||||||
return capacity_hint ? ((static_cast<uint32_t>(7ull*capacity_hint/6u) + 127u)/128u)*128u : 128u;
|
|
||||||
}
|
|
||||||
|
|
||||||
private: // private members
|
|
||||||
bool m_bounded_insert;
|
|
||||||
hasher_type m_hasher;
|
|
||||||
equal_to_type m_equal_to;
|
|
||||||
mutable size_type m_size;
|
|
||||||
bitset_type m_available_indexes;
|
|
||||||
size_type_view m_hash_lists;
|
|
||||||
size_type_view m_next_index;
|
|
||||||
key_type_view m_keys;
|
|
||||||
value_type_view m_values;
|
|
||||||
scalars_view m_scalars;
|
|
||||||
|
|
||||||
template <typename KKey, typename VValue, typename DDevice, typename HHash, typename EEqualTo>
|
|
||||||
friend class UnorderedMap;
|
|
||||||
|
|
||||||
template <typename UMap>
|
|
||||||
friend struct Impl::UnorderedMapErase;
|
|
||||||
|
|
||||||
template <typename UMap>
|
|
||||||
friend struct Impl::UnorderedMapHistogram;
|
|
||||||
|
|
||||||
template <typename UMap>
|
|
||||||
friend struct Impl::UnorderedMapPrint;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Specialization of deep_copy for two UnorderedMap objects.
|
|
||||||
template < typename DKey, typename DT, typename DDevice
|
|
||||||
, typename SKey, typename ST, typename SDevice
|
|
||||||
, typename Hasher, typename EqualTo >
|
|
||||||
inline void deep_copy( UnorderedMap<DKey, DT, DDevice, Hasher, EqualTo> & dst
|
|
||||||
, const UnorderedMap<SKey, ST, SDevice, Hasher, EqualTo> & src )
|
|
||||||
{
|
|
||||||
dst.create_copy_view(src);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace Kokkos
|
|
||||||
|
|
||||||
#endif //KOKKOS_UNORDERED_MAP_HPP
|
|
||||||
@ -1,283 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef KOKKOS_VECTOR_HPP
|
|
||||||
#define KOKKOS_VECTOR_HPP
|
|
||||||
|
|
||||||
#include <Kokkos_Core_fwd.hpp>
|
|
||||||
#include <Kokkos_DualView.hpp>
|
|
||||||
|
|
||||||
/* Drop in replacement for std::vector based on Kokkos::DualView
|
|
||||||
* Most functions only work on the host (it will not compile if called from device kernel)
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
namespace Kokkos {
|
|
||||||
|
|
||||||
template< class Scalar, class Arg1Type = void>
|
|
||||||
class vector : public DualView<Scalar*,LayoutLeft,Arg1Type> {
|
|
||||||
|
|
||||||
typedef Scalar value_type;
|
|
||||||
typedef Scalar* pointer;
|
|
||||||
typedef const Scalar* const_pointer;
|
|
||||||
typedef Scalar* reference;
|
|
||||||
typedef const Scalar* const_reference;
|
|
||||||
typedef Scalar* iterator;
|
|
||||||
typedef const Scalar* const_iterator;
|
|
||||||
|
|
||||||
private:
|
|
||||||
size_t _size;
|
|
||||||
typedef size_t size_type;
|
|
||||||
float _extra_storage;
|
|
||||||
typedef DualView<Scalar*,LayoutLeft,Arg1Type> DV;
|
|
||||||
|
|
||||||
|
|
||||||
public:
|
|
||||||
#ifdef KOKKOS_CUDA_USE_UVM
|
|
||||||
KOKKOS_INLINE_FUNCTION Scalar& operator() (int i) const {return DV::h_view(i);};
|
|
||||||
KOKKOS_INLINE_FUNCTION Scalar& operator[] (int i) const {return DV::h_view(i);};
|
|
||||||
#else
|
|
||||||
inline Scalar& operator() (int i) const {return DV::h_view(i);};
|
|
||||||
inline Scalar& operator[] (int i) const {return DV::h_view(i);};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Member functions which behave like std::vector functions */
|
|
||||||
|
|
||||||
vector():DV() {
|
|
||||||
_size = 0;
|
|
||||||
_extra_storage = 1.1;
|
|
||||||
DV::modified_host() = 1;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
vector(int n, Scalar val=Scalar()):DualView<Scalar*,LayoutLeft,Arg1Type>("Vector",size_t(n*(1.1))) {
|
|
||||||
_size = n;
|
|
||||||
_extra_storage = 1.1;
|
|
||||||
DV::modified_host() = 1;
|
|
||||||
|
|
||||||
assign(n,val);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void resize(size_t n) {
|
|
||||||
if(n>=capacity())
|
|
||||||
DV::resize(size_t (n*_extra_storage));
|
|
||||||
_size = n;
|
|
||||||
}
|
|
||||||
|
|
||||||
void resize(size_t n, const Scalar& val) {
|
|
||||||
assign(n,val);
|
|
||||||
}
|
|
||||||
|
|
||||||
void assign (size_t n, const Scalar& val) {
|
|
||||||
|
|
||||||
/* Resize if necessary (behavour of std:vector) */
|
|
||||||
|
|
||||||
if(n>capacity())
|
|
||||||
DV::resize(size_t (n*_extra_storage));
|
|
||||||
_size = n;
|
|
||||||
|
|
||||||
/* Assign value either on host or on device */
|
|
||||||
|
|
||||||
if( DV::modified_host() >= DV::modified_device() ) {
|
|
||||||
set_functor_host f(DV::h_view,val);
|
|
||||||
parallel_for(n,f);
|
|
||||||
DV::t_host::execution_space::fence();
|
|
||||||
DV::modified_host()++;
|
|
||||||
} else {
|
|
||||||
set_functor f(DV::d_view,val);
|
|
||||||
parallel_for(n,f);
|
|
||||||
DV::t_dev::execution_space::fence();
|
|
||||||
DV::modified_device()++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void reserve(size_t n) {
|
|
||||||
DV::resize(size_t (n*_extra_storage));
|
|
||||||
}
|
|
||||||
|
|
||||||
void push_back(Scalar val) {
|
|
||||||
DV::modified_host()++;
|
|
||||||
if(_size == capacity()) {
|
|
||||||
size_t new_size = _size*_extra_storage;
|
|
||||||
if(new_size == _size) new_size++;
|
|
||||||
DV::resize(new_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
DV::h_view(_size) = val;
|
|
||||||
_size++;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
void pop_back() {
|
|
||||||
_size--;
|
|
||||||
};
|
|
||||||
|
|
||||||
void clear() {
|
|
||||||
_size = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_type size() const {return _size;};
|
|
||||||
size_type max_size() const {return 2000000000;}
|
|
||||||
size_type capacity() const {return DV::capacity();};
|
|
||||||
bool empty() const {return _size==0;};
|
|
||||||
|
|
||||||
iterator begin() const {return &DV::h_view(0);};
|
|
||||||
|
|
||||||
iterator end() const {return &DV::h_view(_size);};
|
|
||||||
|
|
||||||
|
|
||||||
/* std::algorithms wich work originally with iterators, here they are implemented as member functions */
|
|
||||||
|
|
||||||
size_t
|
|
||||||
lower_bound (const size_t& start,
|
|
||||||
const size_t& theEnd,
|
|
||||||
const Scalar& comp_val) const
|
|
||||||
{
|
|
||||||
int lower = start; // FIXME (mfh 24 Apr 2014) narrowing conversion
|
|
||||||
int upper = _size > theEnd? theEnd : _size-1; // FIXME (mfh 24 Apr 2014) narrowing conversion
|
|
||||||
if (upper <= lower) {
|
|
||||||
return theEnd;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar lower_val = DV::h_view(lower);
|
|
||||||
Scalar upper_val = DV::h_view(upper);
|
|
||||||
size_t idx = (upper+lower)/2;
|
|
||||||
Scalar val = DV::h_view(idx);
|
|
||||||
if(val>upper_val) return upper;
|
|
||||||
if(val<lower_val) return start;
|
|
||||||
|
|
||||||
while(upper>lower) {
|
|
||||||
if(comp_val>val) {
|
|
||||||
lower = ++idx;
|
|
||||||
} else {
|
|
||||||
upper = idx;
|
|
||||||
}
|
|
||||||
idx = (upper+lower)/2;
|
|
||||||
val = DV::h_view(idx);
|
|
||||||
}
|
|
||||||
return idx;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_sorted() {
|
|
||||||
for(int i=0;i<_size-1;i++) {
|
|
||||||
if(DV::h_view(i)>DV::h_view(i+1)) return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
iterator find(Scalar val) const {
|
|
||||||
if(_size == 0) return end();
|
|
||||||
|
|
||||||
int upper,lower,current;
|
|
||||||
current = _size/2;
|
|
||||||
upper = _size-1;
|
|
||||||
lower = 0;
|
|
||||||
|
|
||||||
if((val<DV::h_view(0)) || (val>DV::h_view(_size-1)) ) return end();
|
|
||||||
|
|
||||||
while(upper>lower)
|
|
||||||
{
|
|
||||||
if(val>DV::h_view(current)) lower = current+1;
|
|
||||||
else upper = current;
|
|
||||||
current = (upper+lower)/2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(val==DV::h_view(current)) return &DV::h_view(current);
|
|
||||||
else return end();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Additional functions for data management */
|
|
||||||
|
|
||||||
void device_to_host(){
|
|
||||||
deep_copy(DV::h_view,DV::d_view);
|
|
||||||
}
|
|
||||||
void host_to_device() const {
|
|
||||||
deep_copy(DV::d_view,DV::h_view);
|
|
||||||
}
|
|
||||||
|
|
||||||
void on_host() {
|
|
||||||
DV::modified_host() = DV::modified_device() + 1;
|
|
||||||
}
|
|
||||||
void on_device() {
|
|
||||||
DV::modified_device() = DV::modified_host() + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_overallocation(float extra) {
|
|
||||||
_extra_storage = 1.0 + extra;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public:
|
|
||||||
struct set_functor {
|
|
||||||
typedef typename DV::t_dev::execution_space execution_space;
|
|
||||||
typename DV::t_dev _data;
|
|
||||||
Scalar _val;
|
|
||||||
|
|
||||||
set_functor(typename DV::t_dev data, Scalar val) :
|
|
||||||
_data(data),_val(val) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int &i) const {
|
|
||||||
_data(i) = _val;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct set_functor_host {
|
|
||||||
typedef typename DV::t_host::execution_space execution_space;
|
|
||||||
typename DV::t_host _data;
|
|
||||||
Scalar _val;
|
|
||||||
|
|
||||||
set_functor_host(typename DV::t_host data, Scalar val) :
|
|
||||||
_data(data),_val(val) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (const int &i) const {
|
|
||||||
_data(i) = _val;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
@ -1,173 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef KOKKOS_BITSET_IMPL_HPP
|
|
||||||
#define KOKKOS_BITSET_IMPL_HPP
|
|
||||||
|
|
||||||
#include <Kokkos_Macros.hpp>
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#include <cstdio>
|
|
||||||
#include <climits>
|
|
||||||
#include <iostream>
|
|
||||||
#include <iomanip>
|
|
||||||
|
|
||||||
namespace Kokkos { namespace Impl {
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
unsigned rotate_right(unsigned i, int r)
|
|
||||||
{
|
|
||||||
enum { size = static_cast<int>(sizeof(unsigned)*CHAR_BIT) };
|
|
||||||
return r ? ((i >> r) | (i << (size-r))) : i ;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
int bit_scan_forward(unsigned i)
|
|
||||||
{
|
|
||||||
#if defined( __CUDA_ARCH__ )
|
|
||||||
return __ffs(i) - 1;
|
|
||||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
|
||||||
return __builtin_ffs(i) - 1;
|
|
||||||
#elif defined( __INTEL_COMPILER )
|
|
||||||
return _bit_scan_forward(i);
|
|
||||||
#else
|
|
||||||
|
|
||||||
unsigned t = 1u;
|
|
||||||
int r = 0;
|
|
||||||
while (i && (i & t == 0))
|
|
||||||
{
|
|
||||||
t = t << 1;
|
|
||||||
++r;
|
|
||||||
}
|
|
||||||
return r;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
int bit_scan_reverse(unsigned i)
|
|
||||||
{
|
|
||||||
enum { shift = static_cast<int>(sizeof(unsigned)*CHAR_BIT - 1) };
|
|
||||||
#if defined( __CUDA_ARCH__ )
|
|
||||||
return shift - __clz(i);
|
|
||||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
|
||||||
return shift - __builtin_clz(i);
|
|
||||||
#elif defined( __INTEL_COMPILER )
|
|
||||||
return _bit_scan_reverse(i);
|
|
||||||
#else
|
|
||||||
unsigned t = 1u << shift;
|
|
||||||
int r = 0;
|
|
||||||
while (i && (i & t == 0))
|
|
||||||
{
|
|
||||||
t = t >> 1;
|
|
||||||
++r;
|
|
||||||
}
|
|
||||||
return r;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// count the bits set
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
int popcount(unsigned i)
|
|
||||||
{
|
|
||||||
#if defined( __CUDA_ARCH__ )
|
|
||||||
return __popc(i);
|
|
||||||
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
|
||||||
return __builtin_popcount(i);
|
|
||||||
#elif defined ( __INTEL_COMPILER )
|
|
||||||
return _popcnt32(i);
|
|
||||||
#else
|
|
||||||
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive
|
|
||||||
i = i - ((i >> 1) & ~0u/3u); // temp
|
|
||||||
i = (i & ~0u/15u*3u) + ((i >> 2) & ~0u/15u*3u); // temp
|
|
||||||
i = (i + (i >> 4)) & ~0u/255u*15u; // temp
|
|
||||||
return (int)((i * (~0u/255u)) >> (sizeof(unsigned) - 1) * CHAR_BIT); // count
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename Bitset>
|
|
||||||
struct BitsetCount
|
|
||||||
{
|
|
||||||
typedef Bitset bitset_type;
|
|
||||||
typedef typename bitset_type::execution_space::execution_space execution_space;
|
|
||||||
typedef typename bitset_type::size_type size_type;
|
|
||||||
typedef size_type value_type;
|
|
||||||
|
|
||||||
bitset_type m_bitset;
|
|
||||||
|
|
||||||
BitsetCount( bitset_type const& bitset)
|
|
||||||
: m_bitset(bitset)
|
|
||||||
{}
|
|
||||||
|
|
||||||
size_type apply() const
|
|
||||||
{
|
|
||||||
size_type count = 0u;
|
|
||||||
parallel_reduce(m_bitset.m_blocks.dimension_0(), *this, count);
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
static void init( value_type & count)
|
|
||||||
{
|
|
||||||
count = 0u;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
static void join( volatile value_type & count, const volatile size_type & incr )
|
|
||||||
{
|
|
||||||
count += incr;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()( size_type i, value_type & count) const
|
|
||||||
{
|
|
||||||
count += popcount(m_bitset.m_blocks[i]);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
}} //Kokkos::Impl
|
|
||||||
|
|
||||||
#endif // KOKKOS_BITSET_IMPL_HPP
|
|
||||||
|
|
||||||
@ -1,195 +0,0 @@
|
|||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
|
|
||||||
#ifndef KOKKOS_FUNCTIONAL_IMPL_HPP
|
|
||||||
#define KOKKOS_FUNCTIONAL_IMPL_HPP
|
|
||||||
|
|
||||||
#include <Kokkos_Macros.hpp>
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
namespace Kokkos { namespace Impl {
|
|
||||||
|
|
||||||
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
|
||||||
// domain. The author hereby disclaims copyright to this source code.
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
uint32_t getblock32 ( const uint8_t * p, int i )
|
|
||||||
{
|
|
||||||
// used to avoid aliasing error which could cause errors with
|
|
||||||
// forced inlining
|
|
||||||
return ((uint32_t)p[i*4+0])
|
|
||||||
| ((uint32_t)p[i*4+1] << 8)
|
|
||||||
| ((uint32_t)p[i*4+2] << 16)
|
|
||||||
| ((uint32_t)p[i*4+3] << 24);
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
uint32_t rotl32 ( uint32_t x, int8_t r )
|
|
||||||
{ return (x << r) | (x >> (32 - r)); }
|
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
uint32_t fmix32 ( uint32_t h )
|
|
||||||
{
|
|
||||||
h ^= h >> 16;
|
|
||||||
h *= 0x85ebca6b;
|
|
||||||
h ^= h >> 13;
|
|
||||||
h *= 0xc2b2ae35;
|
|
||||||
h ^= h >> 16;
|
|
||||||
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
uint32_t MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed )
|
|
||||||
{
|
|
||||||
const uint8_t * data = (const uint8_t*)key;
|
|
||||||
const int nblocks = len / 4;
|
|
||||||
|
|
||||||
uint32_t h1 = seed;
|
|
||||||
|
|
||||||
const uint32_t c1 = 0xcc9e2d51;
|
|
||||||
const uint32_t c2 = 0x1b873593;
|
|
||||||
|
|
||||||
//----------
|
|
||||||
// body
|
|
||||||
|
|
||||||
for(int i=0; i<nblocks; ++i)
|
|
||||||
{
|
|
||||||
uint32_t k1 = getblock32(data,i);
|
|
||||||
|
|
||||||
k1 *= c1;
|
|
||||||
k1 = rotl32(k1,15);
|
|
||||||
k1 *= c2;
|
|
||||||
|
|
||||||
h1 ^= k1;
|
|
||||||
h1 = rotl32(h1,13);
|
|
||||||
h1 = h1*5+0xe6546b64;
|
|
||||||
}
|
|
||||||
|
|
||||||
//----------
|
|
||||||
// tail
|
|
||||||
|
|
||||||
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
|
|
||||||
|
|
||||||
uint32_t k1 = 0;
|
|
||||||
|
|
||||||
switch(len & 3)
|
|
||||||
{
|
|
||||||
case 3: k1 ^= tail[2] << 16;
|
|
||||||
case 2: k1 ^= tail[1] << 8;
|
|
||||||
case 1: k1 ^= tail[0];
|
|
||||||
k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1;
|
|
||||||
};
|
|
||||||
|
|
||||||
//----------
|
|
||||||
// finalization
|
|
||||||
|
|
||||||
h1 ^= len;
|
|
||||||
|
|
||||||
h1 = fmix32(h1);
|
|
||||||
|
|
||||||
return h1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#if defined( __GNUC__ ) /* GNU C */ || \
|
|
||||||
defined( __GNUG__ ) /* GNU C++ */ || \
|
|
||||||
defined( __clang__ )
|
|
||||||
|
|
||||||
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define KOKKOS_MAY_ALIAS
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
|
|
||||||
{
|
|
||||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
|
||||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
|
||||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
|
||||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
|
||||||
|
|
||||||
enum {
|
|
||||||
NUM_8 = sizeof(T),
|
|
||||||
NUM_16 = NUM_8 / 2,
|
|
||||||
NUM_32 = NUM_8 / 4,
|
|
||||||
NUM_64 = NUM_8 / 8
|
|
||||||
};
|
|
||||||
|
|
||||||
union {
|
|
||||||
T const * const ptr;
|
|
||||||
T64 const * const ptr64;
|
|
||||||
T32 const * const ptr32;
|
|
||||||
T16 const * const ptr16;
|
|
||||||
T8 const * const ptr8;
|
|
||||||
} a = {a_ptr}, b = {b_ptr};
|
|
||||||
|
|
||||||
bool result = true;
|
|
||||||
|
|
||||||
for (int i=0; i < NUM_64; ++i) {
|
|
||||||
result = result && a.ptr64[i] == b.ptr64[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( NUM_64*2 < NUM_32 ) {
|
|
||||||
result = result && a.ptr32[NUM_64*2] == b.ptr32[NUM_64*2];
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( NUM_32*2 < NUM_16 ) {
|
|
||||||
result = result && a.ptr16[NUM_32*2] == b.ptr16[NUM_32*2];
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( NUM_16*2 < NUM_8 ) {
|
|
||||||
result = result && a.ptr8[NUM_16*2] == b.ptr8[NUM_16*2];
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef KOKKOS_MAY_ALIAS
|
|
||||||
|
|
||||||
}} // namespace Kokkos::Impl
|
|
||||||
|
|
||||||
#endif //KOKKOS_FUNCTIONAL_IMPL_HPP
|
|
||||||
@ -1,208 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
|
|
||||||
#define KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
|
|
||||||
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
|
|
||||||
inline
|
|
||||||
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
|
||||||
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view ,
|
|
||||||
typename Impl::enable_if< ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
|
|
||||||
{
|
|
||||||
return view ;
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
|
|
||||||
inline
|
|
||||||
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
|
||||||
create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view )
|
|
||||||
{
|
|
||||||
// Force copy:
|
|
||||||
//typedef Impl::ViewAssignment< Impl::ViewDefault > alloc ; // unused
|
|
||||||
typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type ;
|
|
||||||
|
|
||||||
typename staticcrsgraph_type::HostMirror tmp ;
|
|
||||||
typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map);
|
|
||||||
|
|
||||||
// Allocation to match:
|
|
||||||
tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const'
|
|
||||||
tmp.entries = create_mirror( view.entries );
|
|
||||||
|
|
||||||
|
|
||||||
// Deep copy:
|
|
||||||
deep_copy( tmp_row_map , view.row_map );
|
|
||||||
deep_copy( tmp.entries , view.entries );
|
|
||||||
|
|
||||||
return tmp ;
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
|
|
||||||
inline
|
|
||||||
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
|
||||||
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view ,
|
|
||||||
typename Impl::enable_if< ! ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
|
|
||||||
{
|
|
||||||
return create_mirror( view );
|
|
||||||
}
|
|
||||||
} // namespace Kokkos
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
|
|
||||||
template< class StaticCrsGraphType , class InputSizeType >
|
|
||||||
inline
|
|
||||||
typename StaticCrsGraphType::staticcrsgraph_type
|
|
||||||
create_staticcrsgraph( const std::string & label ,
|
|
||||||
const std::vector< InputSizeType > & input )
|
|
||||||
{
|
|
||||||
typedef StaticCrsGraphType output_type ;
|
|
||||||
//typedef std::vector< InputSizeType > input_type ; // unused
|
|
||||||
|
|
||||||
typedef typename output_type::entries_type entries_type ;
|
|
||||||
|
|
||||||
typedef View< typename output_type::size_type [] ,
|
|
||||||
typename output_type::array_layout ,
|
|
||||||
typename output_type::execution_space > work_type ;
|
|
||||||
|
|
||||||
output_type output ;
|
|
||||||
|
|
||||||
// Create the row map:
|
|
||||||
|
|
||||||
const size_t length = input.size();
|
|
||||||
|
|
||||||
{
|
|
||||||
work_type row_work( "tmp" , length + 1 );
|
|
||||||
|
|
||||||
typename work_type::HostMirror row_work_host =
|
|
||||||
create_mirror_view( row_work );
|
|
||||||
|
|
||||||
size_t sum = 0 ;
|
|
||||||
row_work_host[0] = 0 ;
|
|
||||||
for ( size_t i = 0 ; i < length ; ++i ) {
|
|
||||||
row_work_host[i+1] = sum += input[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
deep_copy( row_work , row_work_host );
|
|
||||||
|
|
||||||
output.entries = entries_type( label , sum );
|
|
||||||
output.row_map = row_work ;
|
|
||||||
}
|
|
||||||
|
|
||||||
return output ;
|
|
||||||
}
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
template< class StaticCrsGraphType , class InputSizeType >
|
|
||||||
inline
|
|
||||||
typename StaticCrsGraphType::staticcrsgraph_type
|
|
||||||
create_staticcrsgraph( const std::string & label ,
|
|
||||||
const std::vector< std::vector< InputSizeType > > & input )
|
|
||||||
{
|
|
||||||
typedef StaticCrsGraphType output_type ;
|
|
||||||
typedef typename output_type::entries_type entries_type ;
|
|
||||||
|
|
||||||
static_assert( entries_type::rank == 1
|
|
||||||
, "Graph entries view must be rank one" );
|
|
||||||
|
|
||||||
typedef View< typename output_type::size_type [] ,
|
|
||||||
typename output_type::array_layout ,
|
|
||||||
typename output_type::execution_space > work_type ;
|
|
||||||
|
|
||||||
output_type output ;
|
|
||||||
|
|
||||||
// Create the row map:
|
|
||||||
|
|
||||||
const size_t length = input.size();
|
|
||||||
|
|
||||||
{
|
|
||||||
work_type row_work( "tmp" , length + 1 );
|
|
||||||
|
|
||||||
typename work_type::HostMirror row_work_host =
|
|
||||||
create_mirror_view( row_work );
|
|
||||||
|
|
||||||
size_t sum = 0 ;
|
|
||||||
row_work_host[0] = 0 ;
|
|
||||||
for ( size_t i = 0 ; i < length ; ++i ) {
|
|
||||||
row_work_host[i+1] = sum += input[i].size();
|
|
||||||
}
|
|
||||||
|
|
||||||
deep_copy( row_work , row_work_host );
|
|
||||||
|
|
||||||
output.entries = entries_type( label , sum );
|
|
||||||
output.row_map = row_work ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fill in the entries:
|
|
||||||
{
|
|
||||||
typename entries_type::HostMirror host_entries =
|
|
||||||
create_mirror_view( output.entries );
|
|
||||||
|
|
||||||
size_t sum = 0 ;
|
|
||||||
for ( size_t i = 0 ; i < length ; ++i ) {
|
|
||||||
for ( size_t j = 0 ; j < input[i].size() ; ++j , ++sum ) {
|
|
||||||
host_entries( sum ) = input[i][j] ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
deep_copy( output.entries , host_entries );
|
|
||||||
}
|
|
||||||
|
|
||||||
return output ;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Kokkos
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
#endif /* #ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP */
|
|
||||||
|
|
||||||
@ -1,101 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <Kokkos_UnorderedMap.hpp>
|
|
||||||
|
|
||||||
namespace Kokkos { namespace Impl {
|
|
||||||
|
|
||||||
uint32_t find_hash_size(uint32_t size)
|
|
||||||
{
|
|
||||||
if (size == 0u) return 0u;
|
|
||||||
|
|
||||||
// these primes try to preserve randomness of hash
|
|
||||||
static const uint32_t primes [] = {
|
|
||||||
3, 7, 13, 23, 53, 97, 193, 389, 769, 1543
|
|
||||||
, 2237, 2423, 2617, 2797, 2999, 3167, 3359, 3539
|
|
||||||
, 3727, 3911, 4441 , 4787 , 5119 , 5471 , 5801 , 6143 , 6521 , 6827
|
|
||||||
, 7177 , 7517 , 7853 , 8887 , 9587 , 10243 , 10937 , 11617 , 12289
|
|
||||||
, 12967 , 13649 , 14341 , 15013 , 15727
|
|
||||||
, 17749 , 19121 , 20479 , 21859 , 23209 , 24593 , 25939 , 27329
|
|
||||||
, 28669 , 30047 , 31469 , 35507 , 38231 , 40961 , 43711 , 46439
|
|
||||||
, 49157 , 51893 , 54617 , 57347 , 60077 , 62801 , 70583 , 75619
|
|
||||||
, 80669 , 85703 , 90749 , 95783 , 100823 , 105871 , 110909 , 115963
|
|
||||||
, 120997 , 126031 , 141157 , 151237 , 161323 , 171401 , 181499 , 191579
|
|
||||||
, 201653 , 211741 , 221813 , 231893 , 241979 , 252079
|
|
||||||
, 282311 , 302483 , 322649 , 342803 , 362969 , 383143 , 403301 , 423457
|
|
||||||
, 443629 , 463787 , 483953 , 504121 , 564617 , 604949 , 645313 , 685609
|
|
||||||
, 725939 , 766273 , 806609 , 846931 , 887261 , 927587 , 967919 , 1008239
|
|
||||||
, 1123477 , 1198397 , 1273289 , 1348177 , 1423067 , 1497983 , 1572869
|
|
||||||
, 1647761 , 1722667 , 1797581 , 1872461 , 1947359 , 2022253
|
|
||||||
, 2246953 , 2396759 , 2546543 , 2696363 , 2846161 , 2995973 , 3145739
|
|
||||||
, 3295541 , 3445357 , 3595117 , 3744941 , 3894707 , 4044503
|
|
||||||
, 4493921 , 4793501 , 5093089 , 5392679 , 5692279 , 5991883 , 6291469
|
|
||||||
, 6591059 , 6890641 , 7190243 , 7489829 , 7789447 , 8089033
|
|
||||||
, 8987807 , 9586981 , 10186177 , 10785371 , 11384539 , 11983729
|
|
||||||
, 12582917 , 13182109 , 13781291 , 14380469 , 14979667 , 15578861
|
|
||||||
, 16178053 , 17895707 , 19014187 , 20132683 , 21251141 , 22369661
|
|
||||||
, 23488103 , 24606583 , 25725083 , 26843549 , 27962027 , 29080529
|
|
||||||
, 30198989 , 31317469 , 32435981 , 35791397 , 38028379 , 40265327
|
|
||||||
, 42502283 , 44739259 , 46976221 , 49213237 , 51450131 , 53687099
|
|
||||||
, 55924061 , 58161041 , 60397993 , 62634959 , 64871921
|
|
||||||
, 71582857 , 76056727 , 80530643 , 85004567 , 89478503 , 93952427
|
|
||||||
, 98426347 , 102900263 , 107374217 , 111848111 , 116322053 , 120795971
|
|
||||||
, 125269877 , 129743807 , 143165587 , 152113427 , 161061283 , 170009141
|
|
||||||
, 178956983 , 187904819 , 196852693 , 205800547 , 214748383 , 223696237
|
|
||||||
, 232644089 , 241591943 , 250539763 , 259487603 , 268435399
|
|
||||||
};
|
|
||||||
|
|
||||||
const uint32_t num_primes = sizeof(primes)/sizeof(uint32_t);
|
|
||||||
|
|
||||||
uint32_t hsize = primes[num_primes-1] ;
|
|
||||||
for (uint32_t i = 0; i < num_primes; ++i) {
|
|
||||||
if (size <= primes[i]) {
|
|
||||||
hsize = primes[i];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return hsize;
|
|
||||||
}
|
|
||||||
|
|
||||||
}} // namespace Kokkos::Impl
|
|
||||||
|
|
||||||
@ -1,297 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef KOKKOS_UNORDERED_MAP_IMPL_HPP
|
|
||||||
#define KOKKOS_UNORDERED_MAP_IMPL_HPP
|
|
||||||
|
|
||||||
#include <Kokkos_Core_fwd.hpp>
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#include <cstdio>
|
|
||||||
#include <climits>
|
|
||||||
#include <iostream>
|
|
||||||
#include <iomanip>
|
|
||||||
|
|
||||||
namespace Kokkos { namespace Impl {
|
|
||||||
|
|
||||||
uint32_t find_hash_size( uint32_t size );
|
|
||||||
|
|
||||||
template <typename Map>
|
|
||||||
struct UnorderedMapRehash
|
|
||||||
{
|
|
||||||
typedef Map map_type;
|
|
||||||
typedef typename map_type::const_map_type const_map_type;
|
|
||||||
typedef typename map_type::execution_space execution_space;
|
|
||||||
typedef typename map_type::size_type size_type;
|
|
||||||
|
|
||||||
map_type m_dst;
|
|
||||||
const_map_type m_src;
|
|
||||||
|
|
||||||
UnorderedMapRehash( map_type const& dst, const_map_type const& src)
|
|
||||||
: m_dst(dst), m_src(src)
|
|
||||||
{}
|
|
||||||
|
|
||||||
void apply() const
|
|
||||||
{
|
|
||||||
parallel_for(m_src.capacity(), *this);
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(size_type i) const
|
|
||||||
{
|
|
||||||
if ( m_src.valid_at(i) )
|
|
||||||
m_dst.insert(m_src.key_at(i), m_src.value_at(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename UMap>
|
|
||||||
struct UnorderedMapErase
|
|
||||||
{
|
|
||||||
typedef UMap map_type;
|
|
||||||
typedef typename map_type::execution_space execution_space;
|
|
||||||
typedef typename map_type::size_type size_type;
|
|
||||||
typedef typename map_type::key_type key_type;
|
|
||||||
typedef typename map_type::impl_value_type value_type;
|
|
||||||
|
|
||||||
map_type m_map;
|
|
||||||
|
|
||||||
UnorderedMapErase( map_type const& map)
|
|
||||||
: m_map(map)
|
|
||||||
{}
|
|
||||||
|
|
||||||
void apply() const
|
|
||||||
{
|
|
||||||
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()( size_type i ) const
|
|
||||||
{
|
|
||||||
const size_type invalid_index = map_type::invalid_index;
|
|
||||||
|
|
||||||
size_type curr = m_map.m_hash_lists(i);
|
|
||||||
size_type next = invalid_index;
|
|
||||||
|
|
||||||
// remove erased head of the linked-list
|
|
||||||
while (curr != invalid_index && !m_map.valid_at(curr)) {
|
|
||||||
next = m_map.m_next_index[curr];
|
|
||||||
m_map.m_next_index[curr] = invalid_index;
|
|
||||||
m_map.m_keys[curr] = key_type();
|
|
||||||
if (m_map.is_set) m_map.m_values[curr] = value_type();
|
|
||||||
curr = next;
|
|
||||||
m_map.m_hash_lists(i) = next;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if the list is non-empty and the head is valid
|
|
||||||
if (curr != invalid_index && m_map.valid_at(curr) ) {
|
|
||||||
size_type prev = curr;
|
|
||||||
curr = m_map.m_next_index[prev];
|
|
||||||
|
|
||||||
while (curr != invalid_index) {
|
|
||||||
next = m_map.m_next_index[curr];
|
|
||||||
if (m_map.valid_at(curr)) {
|
|
||||||
prev = curr;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// remove curr from list
|
|
||||||
m_map.m_next_index[prev] = next;
|
|
||||||
m_map.m_next_index[curr] = invalid_index;
|
|
||||||
m_map.m_keys[curr] = key_type();
|
|
||||||
if (map_type::is_set) m_map.m_values[curr] = value_type();
|
|
||||||
}
|
|
||||||
curr = next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename UMap>
|
|
||||||
struct UnorderedMapHistogram
|
|
||||||
{
|
|
||||||
typedef UMap map_type;
|
|
||||||
typedef typename map_type::execution_space execution_space;
|
|
||||||
typedef typename map_type::size_type size_type;
|
|
||||||
|
|
||||||
typedef View<int[100], execution_space> histogram_view;
|
|
||||||
typedef typename histogram_view::HostMirror host_histogram_view;
|
|
||||||
|
|
||||||
map_type m_map;
|
|
||||||
histogram_view m_length;
|
|
||||||
histogram_view m_distance;
|
|
||||||
histogram_view m_block_distance;
|
|
||||||
|
|
||||||
UnorderedMapHistogram( map_type const& map)
|
|
||||||
: m_map(map)
|
|
||||||
, m_length("UnorderedMap Histogram")
|
|
||||||
, m_distance("UnorderedMap Histogram")
|
|
||||||
, m_block_distance("UnorderedMap Histogram")
|
|
||||||
{}
|
|
||||||
|
|
||||||
void calculate()
|
|
||||||
{
|
|
||||||
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
|
|
||||||
}
|
|
||||||
|
|
||||||
void clear()
|
|
||||||
{
|
|
||||||
Kokkos::deep_copy(m_length, 0);
|
|
||||||
Kokkos::deep_copy(m_distance, 0);
|
|
||||||
Kokkos::deep_copy(m_block_distance, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_length(std::ostream &out)
|
|
||||||
{
|
|
||||||
host_histogram_view host_copy = create_mirror_view(m_length);
|
|
||||||
Kokkos::deep_copy(host_copy, m_length);
|
|
||||||
|
|
||||||
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
|
|
||||||
{
|
|
||||||
out << host_copy[i] << " , ";
|
|
||||||
}
|
|
||||||
out << "\b\b\b " << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_distance(std::ostream &out)
|
|
||||||
{
|
|
||||||
host_histogram_view host_copy = create_mirror_view(m_distance);
|
|
||||||
Kokkos::deep_copy(host_copy, m_distance);
|
|
||||||
|
|
||||||
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
|
|
||||||
{
|
|
||||||
out << host_copy[i] << " , ";
|
|
||||||
}
|
|
||||||
out << "\b\b\b " << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_block_distance(std::ostream &out)
|
|
||||||
{
|
|
||||||
host_histogram_view host_copy = create_mirror_view(m_block_distance);
|
|
||||||
Kokkos::deep_copy(host_copy, m_block_distance);
|
|
||||||
|
|
||||||
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
|
|
||||||
{
|
|
||||||
out << host_copy[i] << " , ";
|
|
||||||
}
|
|
||||||
out << "\b\b\b " << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()( size_type i ) const
|
|
||||||
{
|
|
||||||
const size_type invalid_index = map_type::invalid_index;
|
|
||||||
|
|
||||||
uint32_t length = 0;
|
|
||||||
size_type min_index = ~0u, max_index = 0;
|
|
||||||
for (size_type curr = m_map.m_hash_lists(i); curr != invalid_index; curr = m_map.m_next_index[curr]) {
|
|
||||||
++length;
|
|
||||||
min_index = (curr < min_index) ? curr : min_index;
|
|
||||||
max_index = (max_index < curr) ? curr : max_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_type distance = (0u < length) ? max_index - min_index : 0u;
|
|
||||||
size_type blocks = (0u < length) ? max_index/32u - min_index/32u : 0u;
|
|
||||||
|
|
||||||
// normalize data
|
|
||||||
length = length < 100u ? length : 99u;
|
|
||||||
distance = distance < 100u ? distance : 99u;
|
|
||||||
blocks = blocks < 100u ? blocks : 99u;
|
|
||||||
|
|
||||||
if (0u < length)
|
|
||||||
{
|
|
||||||
atomic_fetch_add( &m_length(length), 1);
|
|
||||||
atomic_fetch_add( &m_distance(distance), 1);
|
|
||||||
atomic_fetch_add( &m_block_distance(blocks), 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename UMap>
|
|
||||||
struct UnorderedMapPrint
|
|
||||||
{
|
|
||||||
typedef UMap map_type;
|
|
||||||
typedef typename map_type::execution_space execution_space;
|
|
||||||
typedef typename map_type::size_type size_type;
|
|
||||||
|
|
||||||
map_type m_map;
|
|
||||||
|
|
||||||
UnorderedMapPrint( map_type const& map)
|
|
||||||
: m_map(map)
|
|
||||||
{}
|
|
||||||
|
|
||||||
void apply()
|
|
||||||
{
|
|
||||||
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()( size_type i ) const
|
|
||||||
{
|
|
||||||
const size_type invalid_index = map_type::invalid_index;
|
|
||||||
|
|
||||||
uint32_t list = m_map.m_hash_lists(i);
|
|
||||||
for (size_type curr = list, ii=0; curr != invalid_index; curr = m_map.m_next_index[curr], ++ii) {
|
|
||||||
printf("%d[%d]: %d->%d\n", list, ii, m_map.key_at(curr), m_map.value_at(curr));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename DKey, typename DValue, typename SKey, typename SValue>
|
|
||||||
struct UnorderedMapCanAssign : public false_ {};
|
|
||||||
|
|
||||||
template <typename Key, typename Value>
|
|
||||||
struct UnorderedMapCanAssign<Key,Value,Key,Value> : public true_ {};
|
|
||||||
|
|
||||||
template <typename Key, typename Value>
|
|
||||||
struct UnorderedMapCanAssign<const Key,Value,Key,Value> : public true_ {};
|
|
||||||
|
|
||||||
template <typename Key, typename Value>
|
|
||||||
struct UnorderedMapCanAssign<const Key,const Value,Key,Value> : public true_ {};
|
|
||||||
|
|
||||||
template <typename Key, typename Value>
|
|
||||||
struct UnorderedMapCanAssign<const Key,const Value,const Key,Value> : public true_ {};
|
|
||||||
|
|
||||||
|
|
||||||
}} //Kokkos::Impl
|
|
||||||
|
|
||||||
#endif // KOKKOS_UNORDERED_MAP_IMPL_HPP
|
|
||||||
@ -1,40 +0,0 @@
|
|||||||
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
|
|
||||||
|
|
||||||
SET(SOURCES
|
|
||||||
UnitTestMain.cpp
|
|
||||||
TestCuda.cpp
|
|
||||||
)
|
|
||||||
|
|
||||||
SET(LIBRARIES kokkoscore)
|
|
||||||
|
|
||||||
IF(Kokkos_ENABLE_Pthread)
|
|
||||||
LIST( APPEND SOURCES
|
|
||||||
TestThreads.cpp
|
|
||||||
)
|
|
||||||
ENDIF()
|
|
||||||
|
|
||||||
IF(Kokkos_ENABLE_Serial)
|
|
||||||
LIST( APPEND SOURCES
|
|
||||||
TestSerial.cpp
|
|
||||||
)
|
|
||||||
ENDIF()
|
|
||||||
|
|
||||||
IF(Kokkos_ENABLE_OpenMP)
|
|
||||||
LIST( APPEND SOURCES
|
|
||||||
TestOpenMP.cpp
|
|
||||||
)
|
|
||||||
ENDIF()
|
|
||||||
|
|
||||||
|
|
||||||
TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
|
||||||
UnitTest
|
|
||||||
SOURCES ${SOURCES}
|
|
||||||
COMM serial mpi
|
|
||||||
NUM_MPI_PROCS 1
|
|
||||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
|
||||||
TESTONLYLIBS kokkos_gtest
|
|
||||||
)
|
|
||||||
|
|
||||||
@ -1,92 +0,0 @@
|
|||||||
KOKKOS_PATH = ../..
|
|
||||||
|
|
||||||
GTEST_PATH = ../../TPL/gtest
|
|
||||||
|
|
||||||
vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests
|
|
||||||
|
|
||||||
default: build_all
|
|
||||||
echo "End Build"
|
|
||||||
|
|
||||||
|
|
||||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|
||||||
CXX = $(NVCC_WRAPPER)
|
|
||||||
CXXFLAGS ?= -O3
|
|
||||||
LINK = $(CXX)
|
|
||||||
LDFLAGS ?= -lpthread
|
|
||||||
else
|
|
||||||
CXX ?= g++
|
|
||||||
CXXFLAGS ?= -O3
|
|
||||||
LINK ?= $(CXX)
|
|
||||||
LDFLAGS ?= -lpthread
|
|
||||||
endif
|
|
||||||
|
|
||||||
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests
|
|
||||||
|
|
||||||
TEST_TARGETS =
|
|
||||||
TARGETS =
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|
||||||
OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
|
|
||||||
TARGETS += KokkosContainers_UnitTest_Cuda
|
|
||||||
TEST_TARGETS += test-cuda
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
|
||||||
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
|
|
||||||
TARGETS += KokkosContainers_UnitTest_Threads
|
|
||||||
TEST_TARGETS += test-threads
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
|
||||||
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
|
|
||||||
TARGETS += KokkosContainers_UnitTest_OpenMP
|
|
||||||
TEST_TARGETS += test-openmp
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
|
||||||
OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
|
|
||||||
TARGETS += KokkosContainers_UnitTest_Serial
|
|
||||||
TEST_TARGETS += test-serial
|
|
||||||
endif
|
|
||||||
|
|
||||||
KokkosContainers_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Cuda
|
|
||||||
|
|
||||||
KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Threads
|
|
||||||
|
|
||||||
KokkosContainers_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_OpenMP
|
|
||||||
|
|
||||||
KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Serial
|
|
||||||
|
|
||||||
test-cuda: KokkosContainers_UnitTest_Cuda
|
|
||||||
./KokkosContainers_UnitTest_Cuda
|
|
||||||
|
|
||||||
test-threads: KokkosContainers_UnitTest_Threads
|
|
||||||
./KokkosContainers_UnitTest_Threads
|
|
||||||
|
|
||||||
test-openmp: KokkosContainers_UnitTest_OpenMP
|
|
||||||
./KokkosContainers_UnitTest_OpenMP
|
|
||||||
|
|
||||||
test-serial: KokkosContainers_UnitTest_Serial
|
|
||||||
./KokkosContainers_UnitTest_Serial
|
|
||||||
|
|
||||||
build_all: $(TARGETS)
|
|
||||||
|
|
||||||
test: $(TEST_TARGETS)
|
|
||||||
|
|
||||||
clean: kokkos-clean
|
|
||||||
rm -f *.o $(TARGETS)
|
|
||||||
|
|
||||||
# Compilation rules
|
|
||||||
|
|
||||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
|
||||||
|
|
||||||
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
|
|
||||||
|
|
||||||
@ -1,285 +0,0 @@
|
|||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
|
|
||||||
#ifndef KOKKOS_TEST_BITSET_HPP
|
|
||||||
#define KOKKOS_TEST_BITSET_HPP
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
namespace Impl {
|
|
||||||
|
|
||||||
template <typename Bitset, bool Set>
|
|
||||||
struct TestBitset
|
|
||||||
{
|
|
||||||
typedef Bitset bitset_type;
|
|
||||||
typedef typename bitset_type::execution_space execution_space;
|
|
||||||
typedef uint32_t value_type;
|
|
||||||
|
|
||||||
bitset_type m_bitset;
|
|
||||||
|
|
||||||
TestBitset( bitset_type const& bitset)
|
|
||||||
: m_bitset(bitset)
|
|
||||||
{}
|
|
||||||
|
|
||||||
unsigned testit(unsigned collisions)
|
|
||||||
{
|
|
||||||
execution_space::fence();
|
|
||||||
|
|
||||||
unsigned count = 0;
|
|
||||||
Kokkos::parallel_reduce( m_bitset.size()*collisions, *this, count);
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void init( value_type & v ) const { v = 0; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void join( volatile value_type & dst, const volatile value_type & src ) const
|
|
||||||
{ dst += src; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(uint32_t i, value_type & v) const
|
|
||||||
{
|
|
||||||
i = i % m_bitset.size();
|
|
||||||
if (Set) {
|
|
||||||
if (m_bitset.set(i)) {
|
|
||||||
if (m_bitset.test(i)) ++v;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (m_bitset.reset(i)) {
|
|
||||||
if (!m_bitset.test(i)) ++v;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Bitset>
|
|
||||||
struct TestBitsetTest
|
|
||||||
{
|
|
||||||
typedef Bitset bitset_type;
|
|
||||||
typedef typename bitset_type::execution_space execution_space;
|
|
||||||
typedef uint32_t value_type;
|
|
||||||
|
|
||||||
bitset_type m_bitset;
|
|
||||||
|
|
||||||
TestBitsetTest( bitset_type const& bitset)
|
|
||||||
: m_bitset(bitset)
|
|
||||||
{}
|
|
||||||
|
|
||||||
unsigned testit()
|
|
||||||
{
|
|
||||||
execution_space::fence();
|
|
||||||
|
|
||||||
unsigned count = 0;
|
|
||||||
Kokkos::parallel_reduce( m_bitset.size(), *this, count);
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void init( value_type & v ) const { v = 0; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void join( volatile value_type & dst, const volatile value_type & src ) const
|
|
||||||
{ dst += src; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(uint32_t i, value_type & v) const
|
|
||||||
{
|
|
||||||
if (m_bitset.test( i )) ++v;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Bitset, bool Set>
|
|
||||||
struct TestBitsetAny
|
|
||||||
{
|
|
||||||
typedef Bitset bitset_type;
|
|
||||||
typedef typename bitset_type::execution_space execution_space;
|
|
||||||
typedef uint32_t value_type;
|
|
||||||
|
|
||||||
bitset_type m_bitset;
|
|
||||||
|
|
||||||
TestBitsetAny( bitset_type const& bitset)
|
|
||||||
: m_bitset(bitset)
|
|
||||||
{}
|
|
||||||
|
|
||||||
unsigned testit()
|
|
||||||
{
|
|
||||||
execution_space::fence();
|
|
||||||
|
|
||||||
unsigned count = 0;
|
|
||||||
Kokkos::parallel_reduce( m_bitset.size(), *this, count);
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void init( value_type & v ) const { v = 0; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void join( volatile value_type & dst, const volatile value_type & src ) const
|
|
||||||
{ dst += src; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(uint32_t i, value_type & v) const
|
|
||||||
{
|
|
||||||
bool result = false;
|
|
||||||
unsigned attempts = 0;
|
|
||||||
uint32_t hint = (i >> 4) << 4;
|
|
||||||
while (attempts < m_bitset.max_hint()) {
|
|
||||||
if (Set) {
|
|
||||||
Kokkos::tie(result, hint) = m_bitset.find_any_unset_near(hint, i);
|
|
||||||
if (result && m_bitset.set(hint)) {
|
|
||||||
++v;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else if (!result) {
|
|
||||||
++attempts;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
Kokkos::tie(result, hint) = m_bitset.find_any_set_near(hint, i);
|
|
||||||
if (result && m_bitset.reset(hint)) {
|
|
||||||
++v;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else if (!result) {
|
|
||||||
++attempts;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
} // namespace Impl
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <typename Device>
|
|
||||||
void test_bitset()
|
|
||||||
{
|
|
||||||
typedef Kokkos::Bitset< Device > bitset_type;
|
|
||||||
typedef Kokkos::ConstBitset< Device > const_bitset_type;
|
|
||||||
|
|
||||||
//unsigned test_sizes[] = { 0u, 1000u, 1u<<14, 1u<<16, 10000001 };
|
|
||||||
unsigned test_sizes[] = { 1000u, 1u<<14, 1u<<16, 10000001 };
|
|
||||||
|
|
||||||
for (int i=0, end = sizeof(test_sizes)/sizeof(unsigned); i<end; ++i) {
|
|
||||||
|
|
||||||
//std::cout << "Bitset " << test_sizes[i] << std::endl;
|
|
||||||
|
|
||||||
bitset_type bitset(test_sizes[i]);
|
|
||||||
|
|
||||||
//std::cout << " Check inital count " << std::endl;
|
|
||||||
// nothing should be set
|
|
||||||
{
|
|
||||||
Impl::TestBitsetTest< bitset_type > f(bitset);
|
|
||||||
uint32_t count = f.testit();
|
|
||||||
EXPECT_EQ(0u, count);
|
|
||||||
EXPECT_EQ(count, bitset.count());
|
|
||||||
}
|
|
||||||
|
|
||||||
//std::cout << " Check set() " << std::endl;
|
|
||||||
bitset.set();
|
|
||||||
// everything should be set
|
|
||||||
{
|
|
||||||
Impl::TestBitsetTest< const_bitset_type > f(bitset);
|
|
||||||
uint32_t count = f.testit();
|
|
||||||
EXPECT_EQ(bitset.size(), count);
|
|
||||||
EXPECT_EQ(count, bitset.count());
|
|
||||||
}
|
|
||||||
|
|
||||||
//std::cout << " Check reset() " << std::endl;
|
|
||||||
bitset.reset();
|
|
||||||
EXPECT_EQ(0u, bitset.count());
|
|
||||||
|
|
||||||
//std::cout << " Check set(i) " << std::endl;
|
|
||||||
// test setting bits
|
|
||||||
{
|
|
||||||
Impl::TestBitset< bitset_type, true > f(bitset);
|
|
||||||
uint32_t count = f.testit(10u);
|
|
||||||
EXPECT_EQ( bitset.size(), bitset.count());
|
|
||||||
EXPECT_EQ( bitset.size(), count );
|
|
||||||
}
|
|
||||||
|
|
||||||
//std::cout << " Check reset(i) " << std::endl;
|
|
||||||
// test resetting bits
|
|
||||||
{
|
|
||||||
Impl::TestBitset< bitset_type, false > f(bitset);
|
|
||||||
uint32_t count = f.testit(10u);
|
|
||||||
EXPECT_EQ( bitset.size(), count);
|
|
||||||
EXPECT_EQ( 0u, bitset.count() );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//std::cout << " Check find_any_set(i) " << std::endl;
|
|
||||||
// test setting any bits
|
|
||||||
{
|
|
||||||
Impl::TestBitsetAny< bitset_type, true > f(bitset);
|
|
||||||
uint32_t count = f.testit();
|
|
||||||
EXPECT_EQ( bitset.size(), bitset.count());
|
|
||||||
EXPECT_EQ( bitset.size(), count );
|
|
||||||
}
|
|
||||||
|
|
||||||
//std::cout << " Check find_any_unset(i) " << std::endl;
|
|
||||||
// test resetting any bits
|
|
||||||
{
|
|
||||||
Impl::TestBitsetAny< bitset_type, false > f(bitset);
|
|
||||||
uint32_t count = f.testit();
|
|
||||||
EXPECT_EQ( bitset.size(), count);
|
|
||||||
EXPECT_EQ( 0u, bitset.count() );
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Test
|
|
||||||
|
|
||||||
#endif //KOKKOS_TEST_BITSET_HPP
|
|
||||||
|
|
||||||
@ -1,263 +0,0 @@
|
|||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef KOKKOS_TEST_COMPLEX_HPP
|
|
||||||
#define KOKKOS_TEST_COMPLEX_HPP
|
|
||||||
|
|
||||||
#include <Kokkos_Complex.hpp>
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
namespace Impl {
|
|
||||||
template <typename RealType>
|
|
||||||
void testComplexConstructors () {
|
|
||||||
typedef Kokkos::complex<RealType> complex_type;
|
|
||||||
|
|
||||||
complex_type z1;
|
|
||||||
complex_type z2 (0.0, 0.0);
|
|
||||||
complex_type z3 (1.0, 0.0);
|
|
||||||
complex_type z4 (0.0, 1.0);
|
|
||||||
complex_type z5 (-1.0, -2.0);
|
|
||||||
|
|
||||||
ASSERT_TRUE( z1 == z2 );
|
|
||||||
ASSERT_TRUE( z1 != z3 );
|
|
||||||
ASSERT_TRUE( z1 != z4 );
|
|
||||||
ASSERT_TRUE( z1 != z5 );
|
|
||||||
|
|
||||||
ASSERT_TRUE( z2 != z3 );
|
|
||||||
ASSERT_TRUE( z2 != z4 );
|
|
||||||
ASSERT_TRUE( z2 != z5 );
|
|
||||||
|
|
||||||
ASSERT_TRUE( z3 != z4 );
|
|
||||||
ASSERT_TRUE( z3 != z5 );
|
|
||||||
|
|
||||||
complex_type z6 (-1.0, -2.0);
|
|
||||||
ASSERT_TRUE( z5 == z6 );
|
|
||||||
|
|
||||||
// Make sure that complex has value semantics, in particular, that
|
|
||||||
// equality tests use values and not pointers, so that
|
|
||||||
// reassignment actually changes the value.
|
|
||||||
z1 = complex_type (-3.0, -4.0);
|
|
||||||
ASSERT_TRUE( z1.real () == -3.0 );
|
|
||||||
ASSERT_TRUE( z1.imag () == -4.0 );
|
|
||||||
ASSERT_TRUE( z1 != z2 );
|
|
||||||
|
|
||||||
complex_type z7 (1.0);
|
|
||||||
ASSERT_TRUE( z3 == z7 );
|
|
||||||
ASSERT_TRUE( z7 == 1.0 );
|
|
||||||
ASSERT_TRUE( z7 != -1.0 );
|
|
||||||
|
|
||||||
z7 = complex_type (5.0);
|
|
||||||
ASSERT_TRUE( z7.real () == 5.0 );
|
|
||||||
ASSERT_TRUE( z7.imag () == 0.0 );
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename RealType>
|
|
||||||
void testPlus () {
|
|
||||||
typedef Kokkos::complex<RealType> complex_type;
|
|
||||||
|
|
||||||
complex_type z1 (1.0, -1.0);
|
|
||||||
complex_type z2 (-1.0, 1.0);
|
|
||||||
complex_type z3 = z1 + z2;
|
|
||||||
ASSERT_TRUE( z3 == complex_type (0.0, 0.0) );
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename RealType>
|
|
||||||
void testMinus () {
|
|
||||||
typedef Kokkos::complex<RealType> complex_type;
|
|
||||||
|
|
||||||
// Test binary minus.
|
|
||||||
complex_type z1 (1.0, -1.0);
|
|
||||||
complex_type z2 (-1.0, 1.0);
|
|
||||||
complex_type z3 = z1 - z2;
|
|
||||||
ASSERT_TRUE( z3 == complex_type (2.0, -2.0) );
|
|
||||||
|
|
||||||
// Test unary minus.
|
|
||||||
complex_type z4 (3.0, -4.0);
|
|
||||||
ASSERT_TRUE( -z1 == complex_type (-3.0, 4.0) );
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename RealType>
|
|
||||||
void testTimes () {
|
|
||||||
typedef Kokkos::complex<RealType> complex_type;
|
|
||||||
|
|
||||||
complex_type z1 (1.0, -1.0);
|
|
||||||
complex_type z2 (-1.0, 1.0);
|
|
||||||
complex_type z3 = z1 * z2;
|
|
||||||
ASSERT_TRUE( z3 == complex_type (0.0, 2.0) );
|
|
||||||
|
|
||||||
// Make sure that std::complex * Kokkos::complex works too.
|
|
||||||
std::complex<RealType> z4 (-1.0, 1.0);
|
|
||||||
complex_type z5 = z4 * z1;
|
|
||||||
ASSERT_TRUE( z5 == complex_type (0.0, 2.0) );
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename RealType>
|
|
||||||
void testDivide () {
|
|
||||||
typedef Kokkos::complex<RealType> complex_type;
|
|
||||||
|
|
||||||
// Test division of a complex number by a real number.
|
|
||||||
complex_type z1 (1.0, -1.0);
|
|
||||||
complex_type z2 (1.0 / 2.0, -1.0 / 2.0);
|
|
||||||
ASSERT_TRUE( z1 / 2.0 == z2 );
|
|
||||||
|
|
||||||
// (-1+2i)/(1-i) == ((-1+2i)(1+i)) / ((1-i)(1+i))
|
|
||||||
// (-1+2i)(1+i) == -3 + i
|
|
||||||
complex_type z3 (-1.0, 2.0);
|
|
||||||
complex_type z4 (1.0, -1.0);
|
|
||||||
complex_type z5 (-3.0, 1.0);
|
|
||||||
ASSERT_TRUE(z3 * Kokkos::conj (z4) == z5 );
|
|
||||||
|
|
||||||
// Test division of a complex number by a complex number.
|
|
||||||
// This assumes that RealType is a floating-point type.
|
|
||||||
complex_type z6 (Kokkos::real (z5) / 2.0,
|
|
||||||
Kokkos::imag (z5) / 2.0);
|
|
||||||
|
|
||||||
complex_type z7 = z3 / z4;
|
|
||||||
ASSERT_TRUE( z7 == z6 );
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename RealType>
|
|
||||||
void testOutsideKernel () {
|
|
||||||
testComplexConstructors<RealType> ();
|
|
||||||
testPlus<RealType> ();
|
|
||||||
testTimes<RealType> ();
|
|
||||||
testDivide<RealType> ();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template<typename RealType, typename Device>
|
|
||||||
void testCreateView () {
|
|
||||||
typedef Kokkos::complex<RealType> complex_type;
|
|
||||||
Kokkos::View<complex_type*, Device> x ("x", 10);
|
|
||||||
ASSERT_TRUE( x.dimension_0 () == 10 );
|
|
||||||
|
|
||||||
// Test that View assignment works.
|
|
||||||
Kokkos::View<complex_type*, Device> x_nonconst = x;
|
|
||||||
Kokkos::View<const complex_type*, Device> x_const = x;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename RealType, typename Device>
|
|
||||||
class Fill {
|
|
||||||
public:
|
|
||||||
typedef typename Device::execution_space execution_space;
|
|
||||||
|
|
||||||
typedef Kokkos::View<Kokkos::complex<RealType>*, Device> view_type;
|
|
||||||
typedef typename view_type::size_type size_type;
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator () (const size_type i) const {
|
|
||||||
x_(i) = val_;
|
|
||||||
}
|
|
||||||
|
|
||||||
Fill (const view_type& x, const Kokkos::complex<RealType>& val) :
|
|
||||||
x_ (x), val_ (val)
|
|
||||||
{}
|
|
||||||
|
|
||||||
private:
|
|
||||||
view_type x_;
|
|
||||||
const Kokkos::complex<RealType> val_;
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename RealType, typename Device>
|
|
||||||
class Sum {
|
|
||||||
public:
|
|
||||||
typedef typename Device::execution_space execution_space;
|
|
||||||
|
|
||||||
typedef Kokkos::View<const Kokkos::complex<RealType>*, Device> view_type;
|
|
||||||
typedef typename view_type::size_type size_type;
|
|
||||||
typedef Kokkos::complex<RealType> value_type;
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator () (const size_type i, Kokkos::complex<RealType>& sum) const {
|
|
||||||
sum += x_(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
Sum (const view_type& x) : x_ (x) {}
|
|
||||||
|
|
||||||
private:
|
|
||||||
view_type x_;
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename RealType, typename Device>
|
|
||||||
void testInsideKernel () {
|
|
||||||
typedef Kokkos::complex<RealType> complex_type;
|
|
||||||
typedef Kokkos::View<complex_type*, Device> view_type;
|
|
||||||
typedef typename view_type::size_type size_type;
|
|
||||||
|
|
||||||
const size_type N = 1000;
|
|
||||||
view_type x ("x", N);
|
|
||||||
ASSERT_TRUE( x.dimension_0 () == N );
|
|
||||||
|
|
||||||
// Kokkos::parallel_reduce (N, [=] (const size_type i, complex_type& result) {
|
|
||||||
// result += x[i];
|
|
||||||
// });
|
|
||||||
|
|
||||||
Kokkos::parallel_for (N, Fill<RealType, Device> (x, complex_type (1.0, -1.0)));
|
|
||||||
|
|
||||||
complex_type sum;
|
|
||||||
Kokkos::parallel_reduce (N, Sum<RealType, Device> (x), sum);
|
|
||||||
|
|
||||||
ASSERT_TRUE( sum.real () == 1000.0 && sum.imag () == -1000.0 );
|
|
||||||
}
|
|
||||||
} // namespace Impl
|
|
||||||
|
|
||||||
|
|
||||||
template <typename Device>
|
|
||||||
void testComplex ()
|
|
||||||
{
|
|
||||||
Impl::testOutsideKernel<float> ();
|
|
||||||
Impl::testOutsideKernel<double> ();
|
|
||||||
|
|
||||||
Impl::testCreateView<float, Device> ();
|
|
||||||
Impl::testCreateView<double, Device> ();
|
|
||||||
|
|
||||||
Impl::testInsideKernel<float, Device> ();
|
|
||||||
Impl::testInsideKernel<double, Device> ();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace Test
|
|
||||||
|
|
||||||
#endif // KOKKOS_TEST_COMPLEX_HPP
|
|
||||||
@ -1,206 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <iomanip>
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#include <Kokkos_Bitset.hpp>
|
|
||||||
#include <Kokkos_UnorderedMap.hpp>
|
|
||||||
#include <Kokkos_Vector.hpp>
|
|
||||||
|
|
||||||
#include <TestBitset.hpp>
|
|
||||||
#include <TestUnorderedMap.hpp>
|
|
||||||
#include <TestStaticCrsGraph.hpp>
|
|
||||||
#include <TestVector.hpp>
|
|
||||||
#include <TestDualView.hpp>
|
|
||||||
#include <TestSegmentedView.hpp>
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef KOKKOS_HAVE_CUDA
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
class cuda : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
static void SetUpTestCase()
|
|
||||||
{
|
|
||||||
std::cout << std::setprecision(5) << std::scientific;
|
|
||||||
Kokkos::HostSpace::execution_space::initialize();
|
|
||||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
|
|
||||||
}
|
|
||||||
static void TearDownTestCase()
|
|
||||||
{
|
|
||||||
Kokkos::Cuda::finalize();
|
|
||||||
Kokkos::HostSpace::execution_space::finalize();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F( cuda , staticcrsgraph )
|
|
||||||
{
|
|
||||||
TestStaticCrsGraph::run_test_graph< Kokkos::Cuda >();
|
|
||||||
TestStaticCrsGraph::run_test_graph2< Kokkos::Cuda >();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void cuda_test_insert_close( uint32_t num_nodes
|
|
||||||
, uint32_t num_inserts
|
|
||||||
, uint32_t num_duplicates
|
|
||||||
)
|
|
||||||
{
|
|
||||||
test_insert< Kokkos::Cuda >( num_nodes, num_inserts, num_duplicates, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuda_test_insert_far( uint32_t num_nodes
|
|
||||||
, uint32_t num_inserts
|
|
||||||
, uint32_t num_duplicates
|
|
||||||
)
|
|
||||||
{
|
|
||||||
test_insert< Kokkos::Cuda >( num_nodes, num_inserts, num_duplicates, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuda_test_failed_insert( uint32_t num_nodes )
|
|
||||||
{
|
|
||||||
test_failed_insert< Kokkos::Cuda >( num_nodes );
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuda_test_deep_copy( uint32_t num_nodes )
|
|
||||||
{
|
|
||||||
test_deep_copy< Kokkos::Cuda >( num_nodes );
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuda_test_vector_combinations(unsigned int size)
|
|
||||||
{
|
|
||||||
test_vector_combinations<int,Kokkos::Cuda>(size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuda_test_dualview_combinations(unsigned int size)
|
|
||||||
{
|
|
||||||
test_dualview_combinations<int,Kokkos::Cuda>(size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuda_test_segmented_view(unsigned int size)
|
|
||||||
{
|
|
||||||
test_segmented_view<double,Kokkos::Cuda>(size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cuda_test_bitset()
|
|
||||||
{
|
|
||||||
test_bitset<Kokkos::Cuda>();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*TEST_F( cuda, bitset )
|
|
||||||
{
|
|
||||||
cuda_test_bitset();
|
|
||||||
}*/
|
|
||||||
|
|
||||||
#define CUDA_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat ) \
|
|
||||||
TEST_F( cuda, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
cuda_test_insert_##name(num_nodes,num_inserts,num_duplicates); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CUDA_FAILED_INSERT_TEST( num_nodes, repeat ) \
|
|
||||||
TEST_F( cuda, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
cuda_test_failed_insert(num_nodes); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CUDA_ASSIGNEMENT_TEST( num_nodes, repeat ) \
|
|
||||||
TEST_F( cuda, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
cuda_test_assignment_operators(num_nodes); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CUDA_DEEP_COPY( num_nodes, repeat ) \
|
|
||||||
TEST_F( cuda, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
cuda_test_deep_copy(num_nodes); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CUDA_VECTOR_COMBINE_TEST( size ) \
|
|
||||||
TEST_F( cuda, vector_combination##size##x) { \
|
|
||||||
cuda_test_vector_combinations(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CUDA_DUALVIEW_COMBINE_TEST( size ) \
|
|
||||||
TEST_F( cuda, dualview_combination##size##x) { \
|
|
||||||
cuda_test_dualview_combinations(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CUDA_SEGMENTEDVIEW_TEST( size ) \
|
|
||||||
TEST_F( cuda, segmentedview_##size##x) { \
|
|
||||||
cuda_test_segmented_view(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
CUDA_DUALVIEW_COMBINE_TEST( 10 )
|
|
||||||
CUDA_VECTOR_COMBINE_TEST( 10 )
|
|
||||||
CUDA_VECTOR_COMBINE_TEST( 3057 )
|
|
||||||
|
|
||||||
|
|
||||||
CUDA_INSERT_TEST(close, 100000, 90000, 100, 500)
|
|
||||||
CUDA_INSERT_TEST(far, 100000, 90000, 100, 500)
|
|
||||||
CUDA_DEEP_COPY( 10000, 1 )
|
|
||||||
CUDA_FAILED_INSERT_TEST( 10000, 1000 )
|
|
||||||
CUDA_SEGMENTEDVIEW_TEST( 200 )
|
|
||||||
|
|
||||||
|
|
||||||
#undef CUDA_INSERT_TEST
|
|
||||||
#undef CUDA_FAILED_INSERT_TEST
|
|
||||||
#undef CUDA_ASSIGNEMENT_TEST
|
|
||||||
#undef CUDA_DEEP_COPY
|
|
||||||
#undef CUDA_VECTOR_COMBINE_TEST
|
|
||||||
#undef CUDA_DUALVIEW_COMBINE_TEST
|
|
||||||
#undef CUDA_SEGMENTEDVIEW_TEST
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifdef KOKKOS_HAVE_CUDA */
|
|
||||||
|
|
||||||
@ -1,121 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef KOKKOS_TEST_DUALVIEW_HPP
|
|
||||||
#define KOKKOS_TEST_DUALVIEW_HPP
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <impl/Kokkos_Timer.hpp>
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
namespace Impl {
|
|
||||||
|
|
||||||
template <typename Scalar, class Device>
|
|
||||||
struct test_dualview_combinations
|
|
||||||
{
|
|
||||||
typedef test_dualview_combinations<Scalar,Device> self_type;
|
|
||||||
|
|
||||||
typedef Scalar scalar_type;
|
|
||||||
typedef Device execution_space;
|
|
||||||
|
|
||||||
Scalar reference;
|
|
||||||
Scalar result;
|
|
||||||
|
|
||||||
template <typename ViewType>
|
|
||||||
Scalar run_me(unsigned int n,unsigned int m){
|
|
||||||
if(n<10) n = 10;
|
|
||||||
if(m<3) m = 3;
|
|
||||||
ViewType a("A",n,m);
|
|
||||||
|
|
||||||
Kokkos::deep_copy( a.d_view , 1 );
|
|
||||||
|
|
||||||
a.template modify<typename ViewType::execution_space>();
|
|
||||||
a.template sync<typename ViewType::host_mirror_space>();
|
|
||||||
|
|
||||||
a.h_view(5,1) = 3;
|
|
||||||
a.h_view(6,1) = 4;
|
|
||||||
a.h_view(7,2) = 5;
|
|
||||||
a.template modify<typename ViewType::host_mirror_space>();
|
|
||||||
ViewType b = Kokkos::subview(a,std::pair<unsigned int, unsigned int>(6,9),std::pair<unsigned int, unsigned int>(0,1));
|
|
||||||
a.template sync<typename ViewType::execution_space>();
|
|
||||||
b.template modify<typename ViewType::execution_space>();
|
|
||||||
|
|
||||||
Kokkos::deep_copy( b.d_view , 2 );
|
|
||||||
|
|
||||||
a.template sync<typename ViewType::host_mirror_space>();
|
|
||||||
Scalar count = 0;
|
|
||||||
for(unsigned int i = 0; i<a.d_view.dimension_0(); i++)
|
|
||||||
for(unsigned int j = 0; j<a.d_view.dimension_1(); j++)
|
|
||||||
count += a.h_view(i,j);
|
|
||||||
return count - a.d_view.dimension_0()*a.d_view.dimension_1()-2-4-3*2;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
test_dualview_combinations(unsigned int size)
|
|
||||||
{
|
|
||||||
result = run_me< Kokkos::DualView<Scalar**,Kokkos::LayoutLeft,Device> >(size,3);
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace Impl
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <typename Scalar, typename Device>
|
|
||||||
void test_dualview_combinations(unsigned int size)
|
|
||||||
{
|
|
||||||
Impl::test_dualview_combinations<Scalar,Device> test(size);
|
|
||||||
ASSERT_EQ( test.result,0);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace Test
|
|
||||||
|
|
||||||
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP
|
|
||||||
@ -1,162 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#include <Kokkos_Bitset.hpp>
|
|
||||||
#include <Kokkos_UnorderedMap.hpp>
|
|
||||||
#include <Kokkos_Vector.hpp>
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
#include <TestBitset.hpp>
|
|
||||||
#include <TestUnorderedMap.hpp>
|
|
||||||
#include <TestStaticCrsGraph.hpp>
|
|
||||||
#include <TestVector.hpp>
|
|
||||||
#include <TestDualView.hpp>
|
|
||||||
#include <TestSegmentedView.hpp>
|
|
||||||
#include <TestComplex.hpp>
|
|
||||||
|
|
||||||
#include <iomanip>
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
#ifdef KOKKOS_HAVE_OPENMP
|
|
||||||
class openmp : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
static void SetUpTestCase()
|
|
||||||
{
|
|
||||||
std::cout << std::setprecision(5) << std::scientific;
|
|
||||||
|
|
||||||
unsigned threads_count = 4 ;
|
|
||||||
|
|
||||||
if ( Kokkos::hwloc::available() ) {
|
|
||||||
threads_count = Kokkos::hwloc::get_available_numa_count() *
|
|
||||||
Kokkos::hwloc::get_available_cores_per_numa();
|
|
||||||
}
|
|
||||||
|
|
||||||
Kokkos::OpenMP::initialize( threads_count );
|
|
||||||
}
|
|
||||||
|
|
||||||
static void TearDownTestCase()
|
|
||||||
{
|
|
||||||
Kokkos::OpenMP::finalize();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F( openmp, complex )
|
|
||||||
{
|
|
||||||
testComplex<Kokkos::OpenMP> ();
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F( openmp, bitset )
|
|
||||||
{
|
|
||||||
test_bitset<Kokkos::OpenMP>();
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F( openmp , staticcrsgraph )
|
|
||||||
{
|
|
||||||
TestStaticCrsGraph::run_test_graph< Kokkos::OpenMP >();
|
|
||||||
TestStaticCrsGraph::run_test_graph2< Kokkos::OpenMP >();
|
|
||||||
}
|
|
||||||
|
|
||||||
#define OPENMP_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
|
|
||||||
TEST_F( openmp, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
test_insert<Kokkos::OpenMP>(num_nodes,num_inserts,num_duplicates, near); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define OPENMP_FAILED_INSERT_TEST( num_nodes, repeat ) \
|
|
||||||
TEST_F( openmp, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
test_failed_insert<Kokkos::OpenMP>(num_nodes); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define OPENMP_ASSIGNEMENT_TEST( num_nodes, repeat ) \
|
|
||||||
TEST_F( openmp, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
test_assignement_operators<Kokkos::OpenMP>(num_nodes); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define OPENMP_DEEP_COPY( num_nodes, repeat ) \
|
|
||||||
TEST_F( openmp, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
test_deep_copy<Kokkos::OpenMP>(num_nodes); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define OPENMP_VECTOR_COMBINE_TEST( size ) \
|
|
||||||
TEST_F( openmp, vector_combination##size##x) { \
|
|
||||||
test_vector_combinations<int,Kokkos::OpenMP>(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define OPENMP_DUALVIEW_COMBINE_TEST( size ) \
|
|
||||||
TEST_F( openmp, dualview_combination##size##x) { \
|
|
||||||
test_dualview_combinations<int,Kokkos::OpenMP>(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define OPENMP_SEGMENTEDVIEW_TEST( size ) \
|
|
||||||
TEST_F( openmp, segmentedview_##size##x) { \
|
|
||||||
test_segmented_view<double,Kokkos::OpenMP>(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
OPENMP_INSERT_TEST(close, 100000, 90000, 100, 500, true)
|
|
||||||
OPENMP_INSERT_TEST(far, 100000, 90000, 100, 500, false)
|
|
||||||
OPENMP_FAILED_INSERT_TEST( 10000, 1000 )
|
|
||||||
OPENMP_DEEP_COPY( 10000, 1 )
|
|
||||||
|
|
||||||
OPENMP_VECTOR_COMBINE_TEST( 10 )
|
|
||||||
OPENMP_VECTOR_COMBINE_TEST( 3057 )
|
|
||||||
OPENMP_DUALVIEW_COMBINE_TEST( 10 )
|
|
||||||
OPENMP_SEGMENTEDVIEW_TEST( 10000 )
|
|
||||||
|
|
||||||
#undef OPENMP_INSERT_TEST
|
|
||||||
#undef OPENMP_FAILED_INSERT_TEST
|
|
||||||
#undef OPENMP_ASSIGNEMENT_TEST
|
|
||||||
#undef OPENMP_DEEP_COPY
|
|
||||||
#undef OPENMP_VECTOR_COMBINE_TEST
|
|
||||||
#undef OPENMP_DUALVIEW_COMBINE_TEST
|
|
||||||
#undef OPENMP_SEGMENTEDVIEW_TEST
|
|
||||||
#endif
|
|
||||||
} // namespace test
|
|
||||||
|
|
||||||
@ -1,708 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP
|
|
||||||
#define KOKKOS_TEST_SEGMENTEDVIEW_HPP
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
|
||||||
|
|
||||||
#include <Kokkos_SegmentedView.hpp>
|
|
||||||
#include <impl/Kokkos_Timer.hpp>
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
namespace Impl {
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
|
|
||||||
struct GrowTest;
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct GrowTest<ViewType , ExecutionSpace , 1> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
GrowTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
a.grow(team_member , team_idx+team_member.team_size());
|
|
||||||
value += team_idx + team_member.team_rank();
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+team_member.team_rank()))
|
|
||||||
a(team_idx+team_member.team_rank()) = team_idx+team_member.team_rank();
|
|
||||||
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct GrowTest<ViewType , ExecutionSpace , 2> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
GrowTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
a.grow(team_member , team_idx+ team_member.team_size());
|
|
||||||
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
|
||||||
value += team_idx + team_member.team_rank() + 13*k;
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) {
|
|
||||||
a(team_idx+ team_member.team_rank(),k) =
|
|
||||||
team_idx+ team_member.team_rank() + 13*k;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct GrowTest<ViewType , ExecutionSpace , 3> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
GrowTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
a.grow(team_member , team_idx+ team_member.team_size());
|
|
||||||
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
|
||||||
value += team_idx + team_member.team_rank() + 13*k + 3*l;
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
|
||||||
a(team_idx+ team_member.team_rank(),k,l) =
|
|
||||||
team_idx+ team_member.team_rank() + 13*k + 3*l;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct GrowTest<ViewType , ExecutionSpace , 4> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
GrowTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
a.grow(team_member , team_idx+ team_member.team_size());
|
|
||||||
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
|
||||||
value += team_idx + team_member.team_rank() + 13*k + 3*l + 7*m;
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
|
||||||
a(team_idx+ team_member.team_rank(),k,l,m) =
|
|
||||||
team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct GrowTest<ViewType , ExecutionSpace , 5> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
GrowTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
a.grow(team_member , team_idx+ team_member.team_size());
|
|
||||||
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
|
||||||
for( typename ExecutionSpace::size_type n=0;n<3;n++)
|
|
||||||
value +=
|
|
||||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
|
||||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
|
||||||
a(team_idx+ team_member.team_rank(),k,l,m,n) =
|
|
||||||
team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct GrowTest<ViewType , ExecutionSpace , 6> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
GrowTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
a.grow(team_member , team_idx+ team_member.team_size());
|
|
||||||
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
|
||||||
for( typename ExecutionSpace::size_type n=0;n<3;n++)
|
|
||||||
for( typename ExecutionSpace::size_type o=0;o<2;o++)
|
|
||||||
value +=
|
|
||||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
|
||||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
|
||||||
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
|
||||||
a(team_idx+ team_member.team_rank(),k,l,m,n,o) =
|
|
||||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct GrowTest<ViewType , ExecutionSpace , 7> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
GrowTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
a.grow(team_member , team_idx+ team_member.team_size());
|
|
||||||
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
|
||||||
for( typename ExecutionSpace::size_type n=0;n<3;n++)
|
|
||||||
for( typename ExecutionSpace::size_type o=0;o<2;o++)
|
|
||||||
for( typename ExecutionSpace::size_type p=0;p<4;p++)
|
|
||||||
value +=
|
|
||||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
|
||||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
|
||||||
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
|
||||||
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
|
|
||||||
a(team_idx+ team_member.team_rank(),k,l,m,n,o,p) =
|
|
||||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct GrowTest<ViewType , ExecutionSpace , 8> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
GrowTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
a.grow(team_member , team_idx + team_member.team_size());
|
|
||||||
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
|
||||||
for( typename ExecutionSpace::size_type n=0;n<3;n++)
|
|
||||||
for( typename ExecutionSpace::size_type o=0;o<2;o++)
|
|
||||||
for( typename ExecutionSpace::size_type p=0;p<4;p++)
|
|
||||||
for( typename ExecutionSpace::size_type q=0;q<3;q++)
|
|
||||||
value +=
|
|
||||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
|
||||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
|
||||||
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
|
||||||
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
|
|
||||||
for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
|
|
||||||
a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q) =
|
|
||||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
|
|
||||||
struct VerifyTest;
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct VerifyTest<ViewType , ExecutionSpace , 1> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
VerifyTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
value += a(team_idx+ team_member.team_rank());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct VerifyTest<ViewType , ExecutionSpace , 2> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
VerifyTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
|
||||||
value += a(team_idx+ team_member.team_rank(),k);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct VerifyTest<ViewType , ExecutionSpace , 3> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
VerifyTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
|
||||||
value += a(team_idx+ team_member.team_rank(),k,l);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct VerifyTest<ViewType , ExecutionSpace , 4> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
VerifyTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
|
||||||
value += a(team_idx+ team_member.team_rank(),k,l,m);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct VerifyTest<ViewType , ExecutionSpace , 5> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
VerifyTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
|
||||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
|
||||||
value += a(team_idx+ team_member.team_rank(),k,l,m,n);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct VerifyTest<ViewType , ExecutionSpace , 6> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
VerifyTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
|
||||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
|
||||||
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
|
||||||
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct VerifyTest<ViewType , ExecutionSpace , 7> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
VerifyTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
|
||||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
|
||||||
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
|
||||||
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
|
|
||||||
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ViewType , class ExecutionSpace>
|
|
||||||
struct VerifyTest<ViewType , ExecutionSpace , 8> {
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
typedef typename Policy::member_type team_type;
|
|
||||||
typedef double value_type;
|
|
||||||
|
|
||||||
ViewType a;
|
|
||||||
|
|
||||||
VerifyTest(ViewType in):a(in) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (team_type team_member, double& value) const {
|
|
||||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
|
||||||
|
|
||||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
|
||||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
|
||||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
|
||||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
|
||||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
|
||||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
|
||||||
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
|
||||||
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
|
|
||||||
for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
|
|
||||||
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Scalar, class ExecutionSpace>
|
|
||||||
struct test_segmented_view
|
|
||||||
{
|
|
||||||
typedef test_segmented_view<Scalar,ExecutionSpace> self_type;
|
|
||||||
|
|
||||||
typedef Scalar scalar_type;
|
|
||||||
typedef ExecutionSpace execution_space;
|
|
||||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
|
||||||
|
|
||||||
double result;
|
|
||||||
double reference;
|
|
||||||
|
|
||||||
template <class ViewType>
|
|
||||||
void run_me(ViewType a, int max_length){
|
|
||||||
const int team_size = Policy::team_size_max( GrowTest<ViewType,execution_space>(a) );
|
|
||||||
const int nteams = max_length/team_size;
|
|
||||||
|
|
||||||
reference = 0;
|
|
||||||
result = 0;
|
|
||||||
|
|
||||||
Kokkos::parallel_reduce(Policy(nteams,team_size),GrowTest<ViewType,execution_space>(a),reference);
|
|
||||||
Kokkos::fence();
|
|
||||||
Kokkos::parallel_reduce(Policy(nteams,team_size),VerifyTest<ViewType,execution_space>(a),result);
|
|
||||||
Kokkos::fence();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
test_segmented_view(unsigned int size,int rank)
|
|
||||||
{
|
|
||||||
reference = 0;
|
|
||||||
result = 0;
|
|
||||||
|
|
||||||
const int dim_1 = 7;
|
|
||||||
const int dim_2 = 3;
|
|
||||||
const int dim_3 = 2;
|
|
||||||
const int dim_4 = 3;
|
|
||||||
const int dim_5 = 2;
|
|
||||||
const int dim_6 = 4;
|
|
||||||
//const int dim_7 = 3;
|
|
||||||
|
|
||||||
if(rank==1) {
|
|
||||||
typedef Kokkos::Experimental::SegmentedView<Scalar*,Kokkos::LayoutLeft,ExecutionSpace> rank1_view;
|
|
||||||
run_me< rank1_view >(rank1_view("Rank1",128,size), size);
|
|
||||||
}
|
|
||||||
if(rank==2) {
|
|
||||||
typedef Kokkos::Experimental::SegmentedView<Scalar**,Kokkos::LayoutLeft,ExecutionSpace> rank2_view;
|
|
||||||
run_me< rank2_view >(rank2_view("Rank2",128,size,dim_1), size);
|
|
||||||
}
|
|
||||||
if(rank==3) {
|
|
||||||
typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2],Kokkos::LayoutRight,ExecutionSpace> rank3_view;
|
|
||||||
run_me< rank3_view >(rank3_view("Rank3",128,size), size);
|
|
||||||
}
|
|
||||||
if(rank==4) {
|
|
||||||
typedef Kokkos::Experimental::SegmentedView<Scalar****,Kokkos::LayoutRight,ExecutionSpace> rank4_view;
|
|
||||||
run_me< rank4_view >(rank4_view("Rank4",128,size,dim_1,dim_2,dim_3), size);
|
|
||||||
}
|
|
||||||
if(rank==5) {
|
|
||||||
typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2][3],Kokkos::LayoutLeft,ExecutionSpace> rank5_view;
|
|
||||||
run_me< rank5_view >(rank5_view("Rank5",128,size), size);
|
|
||||||
}
|
|
||||||
if(rank==6) {
|
|
||||||
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2],Kokkos::LayoutRight,ExecutionSpace> rank6_view;
|
|
||||||
run_me< rank6_view >(rank6_view("Rank6",128,size,dim_1,dim_2,dim_3,dim_4), size);
|
|
||||||
}
|
|
||||||
if(rank==7) {
|
|
||||||
typedef Kokkos::Experimental::SegmentedView<Scalar*******,Kokkos::LayoutLeft,ExecutionSpace> rank7_view;
|
|
||||||
run_me< rank7_view >(rank7_view("Rank7",128,size,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6), size);
|
|
||||||
}
|
|
||||||
if(rank==8) {
|
|
||||||
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> rank8_view;
|
|
||||||
run_me< rank8_view >(rank8_view("Rank8",128,size,dim_1,dim_2,dim_3,dim_4), size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace Impl
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <typename Scalar, class ExecutionSpace>
|
|
||||||
void test_segmented_view(unsigned int size)
|
|
||||||
{
|
|
||||||
{
|
|
||||||
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> view_type;
|
|
||||||
view_type a("A",128,size,7,3,2,3);
|
|
||||||
double reference;
|
|
||||||
|
|
||||||
Impl::GrowTest<view_type,ExecutionSpace> f(a);
|
|
||||||
|
|
||||||
const int team_size = Kokkos::TeamPolicy<ExecutionSpace>::team_size_max( f );
|
|
||||||
const int nteams = (size+team_size-1)/team_size;
|
|
||||||
|
|
||||||
Kokkos::parallel_reduce(Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size),f,reference);
|
|
||||||
|
|
||||||
size_t real_size = ((size+127)/128)*128;
|
|
||||||
|
|
||||||
ASSERT_EQ(real_size,a.dimension_0());
|
|
||||||
ASSERT_EQ(7,a.dimension_1());
|
|
||||||
ASSERT_EQ(3,a.dimension_2());
|
|
||||||
ASSERT_EQ(2,a.dimension_3());
|
|
||||||
ASSERT_EQ(3,a.dimension_4());
|
|
||||||
ASSERT_EQ(2,a.dimension_5());
|
|
||||||
ASSERT_EQ(4,a.dimension_6());
|
|
||||||
ASSERT_EQ(3,a.dimension_7());
|
|
||||||
ASSERT_EQ(real_size,a.dimension(0));
|
|
||||||
ASSERT_EQ(7,a.dimension(1));
|
|
||||||
ASSERT_EQ(3,a.dimension(2));
|
|
||||||
ASSERT_EQ(2,a.dimension(3));
|
|
||||||
ASSERT_EQ(3,a.dimension(4));
|
|
||||||
ASSERT_EQ(2,a.dimension(5));
|
|
||||||
ASSERT_EQ(4,a.dimension(6));
|
|
||||||
ASSERT_EQ(3,a.dimension(7));
|
|
||||||
ASSERT_EQ(8,a.Rank);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,1);
|
|
||||||
ASSERT_EQ(test.reference,test.result);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,2);
|
|
||||||
ASSERT_EQ(test.reference,test.result);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,3);
|
|
||||||
ASSERT_EQ(test.reference,test.result);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,4);
|
|
||||||
ASSERT_EQ(test.reference,test.result);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,5);
|
|
||||||
ASSERT_EQ(test.reference,test.result);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,6);
|
|
||||||
ASSERT_EQ(test.reference,test.result);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,7);
|
|
||||||
ASSERT_EQ(test.reference,test.result);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,8);
|
|
||||||
ASSERT_EQ(test.reference,test.result);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace Test
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
template <typename Scalar, class ExecutionSpace>
|
|
||||||
void test_segmented_view(unsigned int ) {}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* #ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP */
|
|
||||||
|
|
||||||
@ -1,158 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#if ! defined(KOKKOS_HAVE_SERIAL)
|
|
||||||
# error "It doesn't make sense to build this file unless the Kokkos::Serial device is enabled. If you see this message, it probably means that there is an error in Kokkos' CMake build infrastructure."
|
|
||||||
#else
|
|
||||||
|
|
||||||
#include <Kokkos_Bitset.hpp>
|
|
||||||
#include <Kokkos_UnorderedMap.hpp>
|
|
||||||
#include <Kokkos_Vector.hpp>
|
|
||||||
|
|
||||||
#include <TestBitset.hpp>
|
|
||||||
#include <TestUnorderedMap.hpp>
|
|
||||||
#include <TestStaticCrsGraph.hpp>
|
|
||||||
#include <TestVector.hpp>
|
|
||||||
#include <TestDualView.hpp>
|
|
||||||
#include <TestSegmentedView.hpp>
|
|
||||||
#include <TestComplex.hpp>
|
|
||||||
|
|
||||||
#include <iomanip>
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
class serial : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
static void SetUpTestCase () {
|
|
||||||
std::cout << std::setprecision(5) << std::scientific;
|
|
||||||
Kokkos::Serial::initialize ();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void TearDownTestCase () {
|
|
||||||
Kokkos::Serial::finalize ();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
TEST_F( serial , staticcrsgraph )
|
|
||||||
{
|
|
||||||
TestStaticCrsGraph::run_test_graph< Kokkos::Serial >();
|
|
||||||
TestStaticCrsGraph::run_test_graph2< Kokkos::Serial >();
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F( serial, complex )
|
|
||||||
{
|
|
||||||
testComplex<Kokkos::Serial> ();
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F( serial, bitset )
|
|
||||||
{
|
|
||||||
test_bitset<Kokkos::Serial> ();
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SERIAL_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
|
|
||||||
TEST_F( serial, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
test_insert<Kokkos::Serial> (num_nodes, num_inserts, num_duplicates, near); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SERIAL_FAILED_INSERT_TEST( num_nodes, repeat ) \
|
|
||||||
TEST_F( serial, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
test_failed_insert<Kokkos::Serial> (num_nodes); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SERIAL_ASSIGNEMENT_TEST( num_nodes, repeat ) \
|
|
||||||
TEST_F( serial, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
test_assignement_operators<Kokkos::Serial> (num_nodes); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SERIAL_DEEP_COPY( num_nodes, repeat ) \
|
|
||||||
TEST_F( serial, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
test_deep_copy<Kokkos::Serial> (num_nodes); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SERIAL_VECTOR_COMBINE_TEST( size ) \
|
|
||||||
TEST_F( serial, vector_combination##size##x) { \
|
|
||||||
test_vector_combinations<int,Kokkos::Serial>(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SERIAL_DUALVIEW_COMBINE_TEST( size ) \
|
|
||||||
TEST_F( serial, dualview_combination##size##x) { \
|
|
||||||
test_dualview_combinations<int,Kokkos::Serial>(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SERIAL_SEGMENTEDVIEW_TEST( size ) \
|
|
||||||
TEST_F( serial, segmentedview_##size##x) { \
|
|
||||||
test_segmented_view<double,Kokkos::Serial>(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
SERIAL_INSERT_TEST(close, 100000, 90000, 100, 500, true)
|
|
||||||
SERIAL_INSERT_TEST(far, 100000, 90000, 100, 500, false)
|
|
||||||
SERIAL_FAILED_INSERT_TEST( 10000, 1000 )
|
|
||||||
SERIAL_DEEP_COPY( 10000, 1 )
|
|
||||||
|
|
||||||
SERIAL_VECTOR_COMBINE_TEST( 10 )
|
|
||||||
SERIAL_VECTOR_COMBINE_TEST( 3057 )
|
|
||||||
SERIAL_DUALVIEW_COMBINE_TEST( 10 )
|
|
||||||
SERIAL_SEGMENTEDVIEW_TEST( 10000 )
|
|
||||||
|
|
||||||
#undef SERIAL_INSERT_TEST
|
|
||||||
#undef SERIAL_FAILED_INSERT_TEST
|
|
||||||
#undef SERIAL_ASSIGNEMENT_TEST
|
|
||||||
#undef SERIAL_DEEP_COPY
|
|
||||||
#undef SERIAL_VECTOR_COMBINE_TEST
|
|
||||||
#undef SERIAL_DUALVIEW_COMBINE_TEST
|
|
||||||
#undef SERIAL_SEGMENTEDVIEW_TEST
|
|
||||||
|
|
||||||
} // namespace test
|
|
||||||
|
|
||||||
#endif // KOKKOS_HAVE_SERIAL
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,149 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include <Kokkos_StaticCrsGraph.hpp>
|
|
||||||
|
|
||||||
/*--------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
namespace TestStaticCrsGraph {
|
|
||||||
|
|
||||||
template< class Space >
|
|
||||||
void run_test_graph()
|
|
||||||
{
|
|
||||||
typedef Kokkos::StaticCrsGraph< unsigned , Space > dView ;
|
|
||||||
typedef typename dView::HostMirror hView ;
|
|
||||||
|
|
||||||
const unsigned LENGTH = 1000 ;
|
|
||||||
dView dx ;
|
|
||||||
hView hx ;
|
|
||||||
|
|
||||||
std::vector< std::vector< int > > graph( LENGTH );
|
|
||||||
|
|
||||||
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
|
||||||
graph[i].reserve(8);
|
|
||||||
for ( size_t j = 0 ; j < 8 ; ++j ) {
|
|
||||||
graph[i].push_back( i + j * 3 );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
dx = Kokkos::create_staticcrsgraph<dView>( "dx" , graph );
|
|
||||||
hx = Kokkos::create_mirror( dx );
|
|
||||||
|
|
||||||
ASSERT_EQ( hx.row_map.dimension_0() - 1 , LENGTH );
|
|
||||||
|
|
||||||
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
|
||||||
const size_t begin = hx.row_map[i];
|
|
||||||
const size_t n = hx.row_map[i+1] - begin ;
|
|
||||||
ASSERT_EQ( n , graph[i].size() );
|
|
||||||
for ( size_t j = 0 ; j < n ; ++j ) {
|
|
||||||
ASSERT_EQ( (int) hx.entries( j + begin ) , graph[i][j] );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class Space >
|
|
||||||
void run_test_graph2()
|
|
||||||
{
|
|
||||||
typedef Kokkos::StaticCrsGraph< unsigned[3] , Space > dView ;
|
|
||||||
typedef typename dView::HostMirror hView ;
|
|
||||||
|
|
||||||
const unsigned LENGTH = 10 ;
|
|
||||||
|
|
||||||
std::vector< size_t > sizes( LENGTH );
|
|
||||||
|
|
||||||
size_t total_length = 0 ;
|
|
||||||
|
|
||||||
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
|
||||||
total_length += ( sizes[i] = 6 + i % 4 );
|
|
||||||
}
|
|
||||||
|
|
||||||
dView dx = Kokkos::create_staticcrsgraph<dView>( "test" , sizes );
|
|
||||||
hView hx = Kokkos::create_mirror( dx );
|
|
||||||
hView mx = Kokkos::create_mirror( dx );
|
|
||||||
|
|
||||||
ASSERT_EQ( (size_t) dx.row_map.dimension_0() , (size_t) LENGTH + 1 );
|
|
||||||
ASSERT_EQ( (size_t) hx.row_map.dimension_0() , (size_t) LENGTH + 1 );
|
|
||||||
ASSERT_EQ( (size_t) mx.row_map.dimension_0() , (size_t) LENGTH + 1 );
|
|
||||||
|
|
||||||
ASSERT_EQ( (size_t) dx.entries.dimension_0() , (size_t) total_length );
|
|
||||||
ASSERT_EQ( (size_t) hx.entries.dimension_0() , (size_t) total_length );
|
|
||||||
ASSERT_EQ( (size_t) mx.entries.dimension_0() , (size_t) total_length );
|
|
||||||
|
|
||||||
ASSERT_EQ( (size_t) dx.entries.dimension_1() , (size_t) 3 );
|
|
||||||
ASSERT_EQ( (size_t) hx.entries.dimension_1() , (size_t) 3 );
|
|
||||||
ASSERT_EQ( (size_t) mx.entries.dimension_1() , (size_t) 3 );
|
|
||||||
|
|
||||||
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
|
||||||
const size_t entry_begin = hx.row_map[i];
|
|
||||||
const size_t entry_end = hx.row_map[i+1];
|
|
||||||
for ( size_t j = entry_begin ; j < entry_end ; ++j ) {
|
|
||||||
hx.entries(j,0) = j + 1 ;
|
|
||||||
hx.entries(j,1) = j + 2 ;
|
|
||||||
hx.entries(j,2) = j + 3 ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Kokkos::deep_copy( dx.entries , hx.entries );
|
|
||||||
Kokkos::deep_copy( mx.entries , dx.entries );
|
|
||||||
|
|
||||||
ASSERT_EQ( mx.row_map.dimension_0() , (size_t) LENGTH + 1 );
|
|
||||||
|
|
||||||
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
|
||||||
const size_t entry_begin = mx.row_map[i];
|
|
||||||
const size_t entry_end = mx.row_map[i+1];
|
|
||||||
ASSERT_EQ( ( entry_end - entry_begin ) , sizes[i] );
|
|
||||||
for ( size_t j = entry_begin ; j < entry_end ; ++j ) {
|
|
||||||
ASSERT_EQ( (size_t) mx.entries( j , 0 ) , ( j + 1 ) );
|
|
||||||
ASSERT_EQ( (size_t) mx.entries( j , 1 ) , ( j + 2 ) );
|
|
||||||
ASSERT_EQ( (size_t) mx.entries( j , 2 ) , ( j + 3 ) );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} /* namespace TestStaticCrsGraph */
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,168 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
|
||||||
|
|
||||||
#include <Kokkos_Bitset.hpp>
|
|
||||||
#include <Kokkos_UnorderedMap.hpp>
|
|
||||||
|
|
||||||
#include <Kokkos_Vector.hpp>
|
|
||||||
#include <iomanip>
|
|
||||||
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
#include <TestBitset.hpp>
|
|
||||||
#include <TestUnorderedMap.hpp>
|
|
||||||
#include <TestStaticCrsGraph.hpp>
|
|
||||||
|
|
||||||
#include <TestVector.hpp>
|
|
||||||
#include <TestDualView.hpp>
|
|
||||||
#include <TestSegmentedView.hpp>
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
class threads : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
static void SetUpTestCase()
|
|
||||||
{
|
|
||||||
std::cout << std::setprecision(5) << std::scientific;
|
|
||||||
|
|
||||||
unsigned num_threads = 4;
|
|
||||||
|
|
||||||
if (Kokkos::hwloc::available()) {
|
|
||||||
num_threads = Kokkos::hwloc::get_available_numa_count()
|
|
||||||
* Kokkos::hwloc::get_available_cores_per_numa()
|
|
||||||
// * Kokkos::hwloc::get_available_threads_per_core()
|
|
||||||
;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "Threads: " << num_threads << std::endl;
|
|
||||||
|
|
||||||
Kokkos::Threads::initialize( num_threads );
|
|
||||||
}
|
|
||||||
|
|
||||||
static void TearDownTestCase()
|
|
||||||
{
|
|
||||||
Kokkos::Threads::finalize();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F( threads , staticcrsgraph )
|
|
||||||
{
|
|
||||||
TestStaticCrsGraph::run_test_graph< Kokkos::Threads >();
|
|
||||||
TestStaticCrsGraph::run_test_graph2< Kokkos::Threads >();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*TEST_F( threads, bitset )
|
|
||||||
{
|
|
||||||
test_bitset<Kokkos::Threads>();
|
|
||||||
}*/
|
|
||||||
|
|
||||||
#define THREADS_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
|
|
||||||
TEST_F( threads, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
test_insert<Kokkos::Threads>(num_nodes,num_inserts,num_duplicates, near); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define THREADS_FAILED_INSERT_TEST( num_nodes, repeat ) \
|
|
||||||
TEST_F( threads, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
test_failed_insert<Kokkos::Threads>(num_nodes); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define THREADS_ASSIGNEMENT_TEST( num_nodes, repeat ) \
|
|
||||||
TEST_F( threads, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
test_assignement_operators<Kokkos::Threads>(num_nodes); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define THREADS_DEEP_COPY( num_nodes, repeat ) \
|
|
||||||
TEST_F( threads, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
|
|
||||||
for (int i=0; i<repeat; ++i) \
|
|
||||||
test_deep_copy<Kokkos::Threads>(num_nodes); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define THREADS_VECTOR_COMBINE_TEST( size ) \
|
|
||||||
TEST_F( threads, vector_combination##size##x) { \
|
|
||||||
test_vector_combinations<int,Kokkos::Threads>(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define THREADS_DUALVIEW_COMBINE_TEST( size ) \
|
|
||||||
TEST_F( threads, dualview_combination##size##x) { \
|
|
||||||
test_dualview_combinations<int,Kokkos::Threads>(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define THREADS_SEGMENTEDVIEW_TEST( size ) \
|
|
||||||
TEST_F( threads, segmentedview_##size##x) { \
|
|
||||||
test_segmented_view<double,Kokkos::Threads>(size); \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
THREADS_INSERT_TEST(far, 100000, 90000, 100, 500, false)
|
|
||||||
THREADS_FAILED_INSERT_TEST( 10000, 1000 )
|
|
||||||
THREADS_DEEP_COPY( 10000, 1 )
|
|
||||||
|
|
||||||
THREADS_VECTOR_COMBINE_TEST( 10 )
|
|
||||||
THREADS_VECTOR_COMBINE_TEST( 3057 )
|
|
||||||
THREADS_DUALVIEW_COMBINE_TEST( 10 )
|
|
||||||
THREADS_SEGMENTEDVIEW_TEST( 10000 )
|
|
||||||
|
|
||||||
|
|
||||||
#undef THREADS_INSERT_TEST
|
|
||||||
#undef THREADS_FAILED_INSERT_TEST
|
|
||||||
#undef THREADS_ASSIGNEMENT_TEST
|
|
||||||
#undef THREADS_DEEP_COPY
|
|
||||||
#undef THREADS_VECTOR_COMBINE_TEST
|
|
||||||
#undef THREADS_DUALVIEW_COMBINE_TEST
|
|
||||||
#undef THREADS_SEGMENTEDVIEW_TEST
|
|
||||||
|
|
||||||
} // namespace Test
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
|
|
||||||
|
|
||||||
@ -1,313 +0,0 @@
|
|||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
|
|
||||||
#ifndef KOKKOS_TEST_UNORDERED_MAP_HPP
|
|
||||||
#define KOKKOS_TEST_UNORDERED_MAP_HPP
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
namespace Impl {
|
|
||||||
|
|
||||||
template <typename MapType, bool Near = false>
|
|
||||||
struct TestInsert
|
|
||||||
{
|
|
||||||
typedef MapType map_type;
|
|
||||||
typedef typename map_type::execution_space execution_space;
|
|
||||||
typedef uint32_t value_type;
|
|
||||||
|
|
||||||
map_type map;
|
|
||||||
uint32_t inserts;
|
|
||||||
uint32_t collisions;
|
|
||||||
|
|
||||||
TestInsert( map_type arg_map, uint32_t arg_inserts, uint32_t arg_collisions)
|
|
||||||
: map(arg_map)
|
|
||||||
, inserts(arg_inserts)
|
|
||||||
, collisions(arg_collisions)
|
|
||||||
{}
|
|
||||||
|
|
||||||
void testit( bool rehash_on_fail = true )
|
|
||||||
{
|
|
||||||
execution_space::fence();
|
|
||||||
|
|
||||||
uint32_t failed_count = 0;
|
|
||||||
do {
|
|
||||||
failed_count = 0;
|
|
||||||
Kokkos::parallel_reduce(inserts, *this, failed_count);
|
|
||||||
|
|
||||||
if (rehash_on_fail && failed_count > 0u) {
|
|
||||||
const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + failed_count/collisions ;
|
|
||||||
map.rehash( new_capacity );
|
|
||||||
}
|
|
||||||
} while (rehash_on_fail && failed_count > 0u);
|
|
||||||
|
|
||||||
execution_space::fence();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void init( value_type & failed_count ) const { failed_count = 0; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void join( volatile value_type & failed_count, const volatile value_type & count ) const
|
|
||||||
{ failed_count += count; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(uint32_t i, value_type & failed_count) const
|
|
||||||
{
|
|
||||||
const uint32_t key = Near ? i/collisions : i%(inserts/collisions);
|
|
||||||
if (map.insert(key,i).failed()) ++failed_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename MapType, bool Near>
|
|
||||||
struct TestErase
|
|
||||||
{
|
|
||||||
typedef TestErase<MapType, Near> self_type;
|
|
||||||
|
|
||||||
typedef MapType map_type;
|
|
||||||
typedef typename MapType::execution_space execution_space;
|
|
||||||
|
|
||||||
map_type m_map;
|
|
||||||
uint32_t m_num_erase;
|
|
||||||
uint32_t m_num_duplicates;
|
|
||||||
|
|
||||||
TestErase(map_type map, uint32_t num_erases, uint32_t num_duplicates)
|
|
||||||
: m_map(map)
|
|
||||||
, m_num_erase(num_erases)
|
|
||||||
, m_num_duplicates(num_duplicates)
|
|
||||||
{}
|
|
||||||
|
|
||||||
void testit()
|
|
||||||
{
|
|
||||||
execution_space::fence();
|
|
||||||
Kokkos::parallel_for(m_num_erase, *this);
|
|
||||||
execution_space::fence();
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(typename execution_space::size_type i) const
|
|
||||||
{
|
|
||||||
if (Near) {
|
|
||||||
m_map.erase(i/m_num_duplicates);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
m_map.erase(i%(m_num_erase/m_num_duplicates));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename MapType>
|
|
||||||
struct TestFind
|
|
||||||
{
|
|
||||||
typedef MapType map_type;
|
|
||||||
typedef typename MapType::execution_space::execution_space execution_space;
|
|
||||||
typedef uint32_t value_type;
|
|
||||||
|
|
||||||
map_type m_map;
|
|
||||||
uint32_t m_num_insert;
|
|
||||||
uint32_t m_num_duplicates;
|
|
||||||
uint32_t m_max_key;
|
|
||||||
|
|
||||||
TestFind(map_type map, uint32_t num_inserts, uint32_t num_duplicates)
|
|
||||||
: m_map(map)
|
|
||||||
, m_num_insert(num_inserts)
|
|
||||||
, m_num_duplicates(num_duplicates)
|
|
||||||
, m_max_key( ((num_inserts + num_duplicates) - 1)/num_duplicates )
|
|
||||||
{}
|
|
||||||
|
|
||||||
void testit(value_type &errors)
|
|
||||||
{
|
|
||||||
execution_space::execution_space::fence();
|
|
||||||
Kokkos::parallel_reduce(m_map.capacity(), *this, errors);
|
|
||||||
execution_space::execution_space::fence();
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
static void init( value_type & dst)
|
|
||||||
{
|
|
||||||
dst = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
static void join( volatile value_type & dst, const volatile value_type & src)
|
|
||||||
{ dst += src; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(typename execution_space::size_type i, value_type & errors) const
|
|
||||||
{
|
|
||||||
const bool expect_to_find_i = (i < m_max_key);
|
|
||||||
|
|
||||||
const bool exists = m_map.exists(i);
|
|
||||||
|
|
||||||
if (expect_to_find_i && !exists) ++errors;
|
|
||||||
if (!expect_to_find_i && exists) ++errors;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace Impl
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <typename Device>
|
|
||||||
void test_insert( uint32_t num_nodes , uint32_t num_inserts , uint32_t num_duplicates , bool near )
|
|
||||||
{
|
|
||||||
typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type;
|
|
||||||
typedef Kokkos::UnorderedMap<const uint32_t,const uint32_t, Device> const_map_type;
|
|
||||||
|
|
||||||
const uint32_t expected_inserts = (num_inserts + num_duplicates -1u) / num_duplicates;
|
|
||||||
|
|
||||||
map_type map;
|
|
||||||
map.rehash(num_nodes,false);
|
|
||||||
|
|
||||||
if (near) {
|
|
||||||
Impl::TestInsert<map_type,true> test_insert(map, num_inserts, num_duplicates);
|
|
||||||
test_insert.testit();
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
Impl::TestInsert<map_type,false> test_insert(map, num_inserts, num_duplicates);
|
|
||||||
test_insert.testit();
|
|
||||||
}
|
|
||||||
|
|
||||||
const bool print_list = false;
|
|
||||||
if (print_list) {
|
|
||||||
Kokkos::Impl::UnorderedMapPrint<map_type> f(map);
|
|
||||||
f.apply();
|
|
||||||
}
|
|
||||||
|
|
||||||
const uint32_t map_size = map.size();
|
|
||||||
|
|
||||||
ASSERT_FALSE( map.failed_insert());
|
|
||||||
{
|
|
||||||
EXPECT_EQ(expected_inserts, map_size);
|
|
||||||
|
|
||||||
{
|
|
||||||
uint32_t find_errors = 0;
|
|
||||||
Impl::TestFind<const_map_type> test_find(map, num_inserts, num_duplicates);
|
|
||||||
test_find.testit(find_errors);
|
|
||||||
EXPECT_EQ( 0u, find_errors);
|
|
||||||
}
|
|
||||||
|
|
||||||
map.begin_erase();
|
|
||||||
Impl::TestErase<map_type,false> test_erase(map, num_inserts, num_duplicates);
|
|
||||||
test_erase.testit();
|
|
||||||
map.end_erase();
|
|
||||||
EXPECT_EQ(0u, map.size());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Device>
|
|
||||||
void test_failed_insert( uint32_t num_nodes)
|
|
||||||
{
|
|
||||||
typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type;
|
|
||||||
|
|
||||||
map_type map(num_nodes);
|
|
||||||
Impl::TestInsert<map_type> test_insert(map, 2u*num_nodes, 1u);
|
|
||||||
test_insert.testit(false /*don't rehash on fail*/);
|
|
||||||
Device::execution_space::fence();
|
|
||||||
|
|
||||||
EXPECT_TRUE( map.failed_insert() );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <typename Device>
|
|
||||||
void test_deep_copy( uint32_t num_nodes )
|
|
||||||
{
|
|
||||||
typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type;
|
|
||||||
typedef Kokkos::UnorderedMap<const uint32_t, const uint32_t, Device> const_map_type;
|
|
||||||
|
|
||||||
typedef typename map_type::HostMirror host_map_type ;
|
|
||||||
// typedef Kokkos::UnorderedMap<uint32_t, uint32_t, typename Device::host_mirror_execution_space > host_map_type;
|
|
||||||
|
|
||||||
map_type map;
|
|
||||||
map.rehash(num_nodes,false);
|
|
||||||
|
|
||||||
{
|
|
||||||
Impl::TestInsert<map_type> test_insert(map, num_nodes, 1);
|
|
||||||
test_insert.testit();
|
|
||||||
ASSERT_EQ( map.size(), num_nodes);
|
|
||||||
ASSERT_FALSE( map.failed_insert() );
|
|
||||||
{
|
|
||||||
uint32_t find_errors = 0;
|
|
||||||
Impl::TestFind<map_type> test_find(map, num_nodes, 1);
|
|
||||||
test_find.testit(find_errors);
|
|
||||||
EXPECT_EQ( find_errors, 0u);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
host_map_type hmap;
|
|
||||||
Kokkos::deep_copy(hmap, map);
|
|
||||||
|
|
||||||
ASSERT_EQ( map.size(), hmap.size());
|
|
||||||
ASSERT_EQ( map.capacity(), hmap.capacity());
|
|
||||||
{
|
|
||||||
uint32_t find_errors = 0;
|
|
||||||
Impl::TestFind<host_map_type> test_find(hmap, num_nodes, 1);
|
|
||||||
test_find.testit(find_errors);
|
|
||||||
EXPECT_EQ( find_errors, 0u);
|
|
||||||
}
|
|
||||||
|
|
||||||
map_type mmap;
|
|
||||||
Kokkos::deep_copy(mmap, hmap);
|
|
||||||
|
|
||||||
const_map_type cmap = mmap;
|
|
||||||
|
|
||||||
EXPECT_EQ( cmap.size(), num_nodes);
|
|
||||||
|
|
||||||
{
|
|
||||||
uint32_t find_errors = 0;
|
|
||||||
Impl::TestFind<const_map_type> test_find(cmap, num_nodes, 1);
|
|
||||||
test_find.testit(find_errors);
|
|
||||||
EXPECT_EQ( find_errors, 0u);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Test
|
|
||||||
|
|
||||||
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP
|
|
||||||
@ -1,131 +0,0 @@
|
|||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
|
|
||||||
#ifndef KOKKOS_TEST_VECTOR_HPP
|
|
||||||
#define KOKKOS_TEST_VECTOR_HPP
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <impl/Kokkos_Timer.hpp>
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
namespace Impl {
|
|
||||||
|
|
||||||
template <typename Scalar, class Device>
|
|
||||||
struct test_vector_combinations
|
|
||||||
{
|
|
||||||
typedef test_vector_combinations<Scalar,Device> self_type;
|
|
||||||
|
|
||||||
typedef Scalar scalar_type;
|
|
||||||
typedef Device execution_space;
|
|
||||||
|
|
||||||
Scalar reference;
|
|
||||||
Scalar result;
|
|
||||||
|
|
||||||
template <typename Vector>
|
|
||||||
Scalar run_me(unsigned int n){
|
|
||||||
Vector a(n,1);
|
|
||||||
|
|
||||||
|
|
||||||
a.push_back(2);
|
|
||||||
a.resize(n+4);
|
|
||||||
a[n+1] = 3;
|
|
||||||
a[n+2] = 4;
|
|
||||||
a[n+3] = 5;
|
|
||||||
|
|
||||||
|
|
||||||
Scalar temp1 = a[2];
|
|
||||||
Scalar temp2 = a[n];
|
|
||||||
Scalar temp3 = a[n+1];
|
|
||||||
|
|
||||||
a.assign(n+2,-1);
|
|
||||||
|
|
||||||
a[2] = temp1;
|
|
||||||
a[n] = temp2;
|
|
||||||
a[n+1] = temp3;
|
|
||||||
|
|
||||||
Scalar test1 = 0;
|
|
||||||
for(unsigned int i=0; i<a.size(); i++)
|
|
||||||
test1+=a[i];
|
|
||||||
|
|
||||||
a.assign(n+1,-2);
|
|
||||||
Scalar test2 = 0;
|
|
||||||
for(unsigned int i=0; i<a.size(); i++)
|
|
||||||
test2+=a[i];
|
|
||||||
|
|
||||||
a.reserve(n+10);
|
|
||||||
|
|
||||||
Scalar test3 = 0;
|
|
||||||
for(unsigned int i=0; i<a.size(); i++)
|
|
||||||
test3+=a[i];
|
|
||||||
|
|
||||||
|
|
||||||
return (test1*test2+test3)*test2+test1*test3;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
test_vector_combinations(unsigned int size)
|
|
||||||
{
|
|
||||||
reference = run_me<std::vector<Scalar> >(size);
|
|
||||||
result = run_me<Kokkos::vector<Scalar,Device> >(size);
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace Impl
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <typename Scalar, typename Device>
|
|
||||||
void test_vector_combinations(unsigned int size)
|
|
||||||
{
|
|
||||||
Impl::test_vector_combinations<Scalar,Device> test(size);
|
|
||||||
ASSERT_EQ( test.reference, test.result);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace Test
|
|
||||||
|
|
||||||
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP
|
|
||||||
@ -1,50 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
|
||||||
::testing::InitGoogleTest(&argc,argv);
|
|
||||||
return RUN_ALL_TESTS();
|
|
||||||
}
|
|
||||||
|
|
||||||
@ -1,11 +0,0 @@
|
|||||||
|
|
||||||
|
|
||||||
TRIBITS_SUBPACKAGE(Core)
|
|
||||||
|
|
||||||
ADD_SUBDIRECTORY(src)
|
|
||||||
|
|
||||||
TRIBITS_ADD_TEST_DIRECTORIES(unit_test)
|
|
||||||
TRIBITS_ADD_TEST_DIRECTORIES(perf_test)
|
|
||||||
|
|
||||||
TRIBITS_SUBPACKAGE_POSTPROCESS()
|
|
||||||
|
|
||||||
@ -1,4 +0,0 @@
|
|||||||
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
|
|
||||||
LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREAD
|
|
||||||
TEST_OPTIONAL_TPLS CUSPARSE
|
|
||||||
)
|
|
||||||
@ -1,50 +0,0 @@
|
|||||||
#ifndef KOKKOS_CORE_CONFIG_H
|
|
||||||
#define KOKKOS_CORE_CONFIG_H
|
|
||||||
|
|
||||||
/* The trivial 'src/build_common.sh' creates a config
|
|
||||||
* that must stay in sync with this file.
|
|
||||||
*/
|
|
||||||
#cmakedefine KOKKOS_FOR_SIERRA
|
|
||||||
|
|
||||||
#if !defined( KOKKOS_FOR_SIERRA )
|
|
||||||
|
|
||||||
#cmakedefine KOKKOS_HAVE_MPI
|
|
||||||
#cmakedefine KOKKOS_HAVE_CUDA
|
|
||||||
|
|
||||||
// mfh 16 Sep 2014: If passed in on the command line, that overrides
|
|
||||||
// any value of KOKKOS_USE_CUDA_UVM here. Doing this should prevent build
|
|
||||||
// warnings like this one:
|
|
||||||
//
|
|
||||||
// packages/kokkos/core/src/KokkosCore_config.h:13:1: warning: "KOKKOS_USE_CUDA_UVM" redefined
|
|
||||||
//
|
|
||||||
// At some point, we should edit the test-build scripts in
|
|
||||||
// Trilinos/cmake/ctest/drivers/perseus/, and take
|
|
||||||
// -DKOKKOS_USE_CUDA_UVM from the command-line arguments there. I
|
|
||||||
// hesitate to do that now, because I'm not sure if all the files are
|
|
||||||
// including KokkosCore_config.h (or a header file that includes it) like
|
|
||||||
// they should.
|
|
||||||
|
|
||||||
#if ! defined(KOKKOS_USE_CUDA_UVM)
|
|
||||||
#cmakedefine KOKKOS_USE_CUDA_UVM
|
|
||||||
#endif // ! defined(KOKKOS_USE_CUDA_UVM)
|
|
||||||
|
|
||||||
#cmakedefine KOKKOS_HAVE_PTHREAD
|
|
||||||
#cmakedefine KOKKOS_HAVE_SERIAL
|
|
||||||
#cmakedefine KOKKOS_HAVE_QTHREAD
|
|
||||||
#cmakedefine KOKKOS_HAVE_Winthread
|
|
||||||
#cmakedefine KOKKOS_HAVE_OPENMP
|
|
||||||
#cmakedefine KOKKOS_HAVE_HWLOC
|
|
||||||
#cmakedefine KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
|
|
||||||
#cmakedefine KOKKOS_HAVE_CXX11
|
|
||||||
#cmakedefine KOKKOS_HAVE_CUSPARSE
|
|
||||||
#cmakedefine KOKKOS_ENABLE_PROFILING_COLLECT_KERNEL_DATA
|
|
||||||
#cmakedefine KOKKOS_ENABLE_PROFILING_AGGREGATE_MPI
|
|
||||||
|
|
||||||
// Don't forbid users from defining this macro on the command line,
|
|
||||||
// but still make sure that CMake logic can control its definition.
|
|
||||||
#if ! defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
|
|
||||||
#cmakedefine KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
|
|
||||||
#endif // KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
|
|
||||||
|
|
||||||
#endif // KOKKOS_FOR_SIERRA
|
|
||||||
#endif // KOKKOS_CORE_CONFIG_H
|
|
||||||
@ -1,18 +0,0 @@
|
|||||||
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINRARY_DIR})
|
|
||||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
|
|
||||||
SET(SOURCES
|
|
||||||
PerfTestMain.cpp
|
|
||||||
PerfTestHost.cpp
|
|
||||||
PerfTestCuda.cpp
|
|
||||||
)
|
|
||||||
|
|
||||||
TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
|
||||||
PerfTest
|
|
||||||
SOURCES ${SOURCES}
|
|
||||||
COMM serial mpi
|
|
||||||
NUM_MPI_PROCS 1
|
|
||||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
|
||||||
TESTONLYLIBS kokkos_gtest
|
|
||||||
)
|
|
||||||
@ -1,66 +0,0 @@
|
|||||||
KOKKOS_PATH = ../..
|
|
||||||
|
|
||||||
GTEST_PATH = ../../tpls/gtest
|
|
||||||
|
|
||||||
vpath %.cpp ${KOKKOS_PATH}/core/perf_test
|
|
||||||
|
|
||||||
default: build_all
|
|
||||||
echo "End Build"
|
|
||||||
|
|
||||||
|
|
||||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|
||||||
CXX = $(NVCC_WRAPPER)
|
|
||||||
CXXFLAGS ?= -O3
|
|
||||||
LINK = $(CXX)
|
|
||||||
LDFLAGS ?= -lpthread
|
|
||||||
else
|
|
||||||
CXX ?= g++
|
|
||||||
CXXFLAGS ?= -O3
|
|
||||||
LINK ?= $(CXX)
|
|
||||||
LDFLAGS ?= -lpthread
|
|
||||||
endif
|
|
||||||
|
|
||||||
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/perf_test
|
|
||||||
|
|
||||||
TEST_TARGETS =
|
|
||||||
TARGETS =
|
|
||||||
|
|
||||||
OBJ_PERF = PerfTestHost.o PerfTestCuda.o PerfTestMain.o gtest-all.o
|
|
||||||
TARGETS += KokkosCore_PerformanceTest
|
|
||||||
TEST_TARGETS += test-performance
|
|
||||||
|
|
||||||
OBJ_ATOMICS = test_atomic.o
|
|
||||||
TARGETS += KokkosCore_PerformanceTest_Atomics
|
|
||||||
TEST_TARGETS += test-atomic
|
|
||||||
|
|
||||||
|
|
||||||
KokkosCore_PerformanceTest: $(OBJ_PERF) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_PERF) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest
|
|
||||||
|
|
||||||
KokkosCore_PerformanceTest_Atomics: $(OBJ_ATOMICS) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_ATOMICS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest_Atomics
|
|
||||||
|
|
||||||
test-performance: KokkosCore_PerformanceTest
|
|
||||||
./KokkosCore_PerformanceTest
|
|
||||||
|
|
||||||
test-atomic: KokkosCore_PerformanceTest_Atomics
|
|
||||||
./KokkosCore_PerformanceTest_Atomics
|
|
||||||
|
|
||||||
|
|
||||||
build_all: $(TARGETS)
|
|
||||||
|
|
||||||
test: $(TEST_TARGETS)
|
|
||||||
|
|
||||||
clean: kokkos-clean
|
|
||||||
rm -f *.o $(TARGETS)
|
|
||||||
|
|
||||||
# Compilation rules
|
|
||||||
|
|
||||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
|
||||||
|
|
||||||
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
|
|
||||||
|
|
||||||
@ -1,309 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef KOKKOS_BLAS_KERNELS_HPP
|
|
||||||
#define KOKKOS_BLAS_KERNELS_HPP
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
|
|
||||||
template< class ConstVectorType ,
|
|
||||||
class Device = typename ConstVectorType::execution_space >
|
|
||||||
struct Dot ;
|
|
||||||
|
|
||||||
template< class ConstVectorType ,
|
|
||||||
class Device = typename ConstVectorType::execution_space >
|
|
||||||
struct DotSingle ;
|
|
||||||
|
|
||||||
template< class ConstScalarType ,
|
|
||||||
class VectorType ,
|
|
||||||
class Device = typename VectorType::execution_space >
|
|
||||||
struct Scale ;
|
|
||||||
|
|
||||||
template< class ConstScalarType ,
|
|
||||||
class ConstVectorType ,
|
|
||||||
class VectorType ,
|
|
||||||
class Device = typename VectorType::execution_space >
|
|
||||||
struct AXPBY ;
|
|
||||||
|
|
||||||
/** \brief Y = alpha * X + beta * Y */
|
|
||||||
template< class ConstScalarType ,
|
|
||||||
class ConstVectorType ,
|
|
||||||
class VectorType >
|
|
||||||
void axpby( const ConstScalarType & alpha ,
|
|
||||||
const ConstVectorType & X ,
|
|
||||||
const ConstScalarType & beta ,
|
|
||||||
const VectorType & Y )
|
|
||||||
{
|
|
||||||
typedef AXPBY< ConstScalarType , ConstVectorType , VectorType > functor ;
|
|
||||||
|
|
||||||
parallel_for( Y.dimension_0() , functor( alpha , X , beta , Y ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Y *= alpha */
|
|
||||||
template< class ConstScalarType ,
|
|
||||||
class VectorType >
|
|
||||||
void scale( const ConstScalarType & alpha , const VectorType & Y )
|
|
||||||
{
|
|
||||||
typedef Scale< ConstScalarType , VectorType > functor ;
|
|
||||||
|
|
||||||
parallel_for( Y.dimension_0() , functor( alpha , Y ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class ConstVectorType ,
|
|
||||||
class Finalize >
|
|
||||||
void dot( const ConstVectorType & X ,
|
|
||||||
const ConstVectorType & Y ,
|
|
||||||
const Finalize & finalize )
|
|
||||||
{
|
|
||||||
typedef Dot< ConstVectorType > functor ;
|
|
||||||
|
|
||||||
parallel_reduce( X.dimension_0() , functor( X , Y ) , finalize );
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class ConstVectorType ,
|
|
||||||
class Finalize >
|
|
||||||
void dot( const ConstVectorType & X ,
|
|
||||||
const Finalize & finalize )
|
|
||||||
{
|
|
||||||
typedef DotSingle< ConstVectorType > functor ;
|
|
||||||
|
|
||||||
parallel_reduce( X.dimension_0() , functor( X ) , finalize );
|
|
||||||
}
|
|
||||||
|
|
||||||
} /* namespace Kokkos */
|
|
||||||
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
namespace Kokkos {
|
|
||||||
|
|
||||||
template< class Type , class Device >
|
|
||||||
struct Dot
|
|
||||||
{
|
|
||||||
typedef typename Device::execution_space execution_space ;
|
|
||||||
|
|
||||||
typedef typename
|
|
||||||
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
|
||||||
Impl::unsigned_< Type::Rank > >::type ok_rank ;
|
|
||||||
|
|
||||||
|
|
||||||
/* typedef typename
|
|
||||||
Impl::StaticAssertSame< execution_space ,
|
|
||||||
typename Type::execution_space >::type ok_device ;*/
|
|
||||||
|
|
||||||
typedef double value_type ;
|
|
||||||
|
|
||||||
#if 1
|
|
||||||
typename Type::const_type X ;
|
|
||||||
typename Type::const_type Y ;
|
|
||||||
#else
|
|
||||||
Type X ;
|
|
||||||
Type Y ;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
Dot( const Type & arg_x , const Type & arg_y )
|
|
||||||
: X(arg_x) , Y(arg_y) { }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()( int i , value_type & update ) const
|
|
||||||
{ update += X[i] * Y[i]; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
static void join( volatile value_type & update ,
|
|
||||||
const volatile value_type & source )
|
|
||||||
{ update += source; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
static void init( value_type & update )
|
|
||||||
{ update = 0 ; }
|
|
||||||
};
|
|
||||||
|
|
||||||
template< class Type , class Device >
|
|
||||||
struct DotSingle
|
|
||||||
{
|
|
||||||
typedef typename Device::execution_space execution_space ;
|
|
||||||
|
|
||||||
typedef typename
|
|
||||||
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
|
||||||
Impl::unsigned_< Type::Rank > >::type ok_rank ;
|
|
||||||
|
|
||||||
/* typedef typename
|
|
||||||
Impl::StaticAssertSame< execution_space ,
|
|
||||||
typename Type::execution_space >::type ok_device ;*/
|
|
||||||
|
|
||||||
typedef double value_type ;
|
|
||||||
|
|
||||||
#if 1
|
|
||||||
typename Type::const_type X ;
|
|
||||||
#else
|
|
||||||
Type X ;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
DotSingle( const Type & arg_x ) : X(arg_x) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()( int i , value_type & update ) const
|
|
||||||
{
|
|
||||||
const typename Type::value_type & x = X[i]; update += x * x ;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
static void join( volatile value_type & update ,
|
|
||||||
const volatile value_type & source )
|
|
||||||
{ update += source; }
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
static void init( value_type & update )
|
|
||||||
{ update = 0 ; }
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template< class ScalarType , class VectorType , class Device>
|
|
||||||
struct Scale
|
|
||||||
{
|
|
||||||
typedef typename Device::execution_space execution_space ;
|
|
||||||
|
|
||||||
/* typedef typename
|
|
||||||
Impl::StaticAssertSame< execution_space ,
|
|
||||||
typename ScalarType::execution_space >::type
|
|
||||||
ok_scalar_device ;
|
|
||||||
|
|
||||||
typedef typename
|
|
||||||
Impl::StaticAssertSame< execution_space ,
|
|
||||||
typename VectorType::execution_space >::type
|
|
||||||
ok_vector_device ;*/
|
|
||||||
|
|
||||||
typedef typename
|
|
||||||
Impl::StaticAssertSame< Impl::unsigned_< 0 > ,
|
|
||||||
Impl::unsigned_< ScalarType::Rank > >::type
|
|
||||||
ok_scalar_rank ;
|
|
||||||
|
|
||||||
typedef typename
|
|
||||||
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
|
||||||
Impl::unsigned_< VectorType::Rank > >::type
|
|
||||||
ok_vector_rank ;
|
|
||||||
|
|
||||||
#if 1
|
|
||||||
typename ScalarType::const_type alpha ;
|
|
||||||
#else
|
|
||||||
ScalarType alpha ;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
VectorType Y ;
|
|
||||||
|
|
||||||
Scale( const ScalarType & arg_alpha , const VectorType & arg_Y )
|
|
||||||
: alpha( arg_alpha ), Y( arg_Y ) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()( int i ) const
|
|
||||||
{
|
|
||||||
Y[i] *= alpha() ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template< class ScalarType ,
|
|
||||||
class ConstVectorType ,
|
|
||||||
class VectorType,
|
|
||||||
class Device>
|
|
||||||
struct AXPBY
|
|
||||||
{
|
|
||||||
typedef typename Device::execution_space execution_space ;
|
|
||||||
|
|
||||||
/* typedef typename
|
|
||||||
Impl::StaticAssertSame< execution_space ,
|
|
||||||
typename ScalarType::execution_space >::type
|
|
||||||
ok_scalar_device ;
|
|
||||||
|
|
||||||
typedef typename
|
|
||||||
Impl::StaticAssertSame< execution_space ,
|
|
||||||
typename ConstVectorType::execution_space >::type
|
|
||||||
ok_const_vector_device ;
|
|
||||||
|
|
||||||
typedef typename
|
|
||||||
Impl::StaticAssertSame< execution_space ,
|
|
||||||
typename VectorType::execution_space >::type
|
|
||||||
ok_vector_device ;*/
|
|
||||||
|
|
||||||
typedef typename
|
|
||||||
Impl::StaticAssertSame< Impl::unsigned_< 0 > ,
|
|
||||||
Impl::unsigned_< ScalarType::Rank > >::type
|
|
||||||
ok_scalar_rank ;
|
|
||||||
|
|
||||||
typedef typename
|
|
||||||
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
|
||||||
Impl::unsigned_< ConstVectorType::Rank > >::type
|
|
||||||
ok_const_vector_rank ;
|
|
||||||
|
|
||||||
typedef typename
|
|
||||||
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
|
||||||
Impl::unsigned_< VectorType::Rank > >::type
|
|
||||||
ok_vector_rank ;
|
|
||||||
|
|
||||||
#if 1
|
|
||||||
typename ScalarType::const_type alpha , beta ;
|
|
||||||
typename ConstVectorType::const_type X ;
|
|
||||||
#else
|
|
||||||
ScalarType alpha , beta ;
|
|
||||||
ConstVectorType X ;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
VectorType Y ;
|
|
||||||
|
|
||||||
AXPBY( const ScalarType & arg_alpha ,
|
|
||||||
const ConstVectorType & arg_X ,
|
|
||||||
const ScalarType & arg_beta ,
|
|
||||||
const VectorType & arg_Y )
|
|
||||||
: alpha( arg_alpha ), beta( arg_beta ), X( arg_X ), Y( arg_Y ) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()( int i ) const
|
|
||||||
{
|
|
||||||
Y[i] = alpha() * X[i] + beta() * Y[i] ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} /* namespace Kokkos */
|
|
||||||
|
|
||||||
#endif /* #ifndef KOKKOS_BLAS_KERNELS_HPP */
|
|
||||||
@ -1,189 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <iomanip>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#if defined( KOKKOS_HAVE_CUDA )
|
|
||||||
|
|
||||||
#include <impl/Kokkos_Timer.hpp>
|
|
||||||
|
|
||||||
#include <PerfTestHexGrad.hpp>
|
|
||||||
#include <PerfTestBlasKernels.hpp>
|
|
||||||
#include <PerfTestGramSchmidt.hpp>
|
|
||||||
#include <PerfTestDriver.hpp>
|
|
||||||
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
class cuda : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
static void SetUpTestCase() {
|
|
||||||
Kokkos::HostSpace::execution_space::initialize();
|
|
||||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
|
|
||||||
}
|
|
||||||
static void TearDownTestCase() {
|
|
||||||
Kokkos::Cuda::finalize();
|
|
||||||
Kokkos::HostSpace::execution_space::finalize();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F( cuda, hexgrad )
|
|
||||||
{
|
|
||||||
EXPECT_NO_THROW( run_test_hexgrad< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F( cuda, gramschmidt )
|
|
||||||
{
|
|
||||||
EXPECT_NO_THROW( run_test_gramschmidt< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct TextureFetch
|
|
||||||
{
|
|
||||||
typedef Kokkos::View< T *, Kokkos::CudaSpace> array_type;
|
|
||||||
typedef Kokkos::View< const T *, Kokkos::CudaSpace, Kokkos::MemoryRandomAccess> const_array_type;
|
|
||||||
typedef Kokkos::View< int *, Kokkos::CudaSpace> index_array_type;
|
|
||||||
typedef Kokkos::View< const int *, Kokkos::CudaSpace> const_index_array_type;
|
|
||||||
|
|
||||||
struct FillArray
|
|
||||||
{
|
|
||||||
array_type m_array;
|
|
||||||
FillArray( const array_type & array )
|
|
||||||
: m_array(array)
|
|
||||||
{}
|
|
||||||
|
|
||||||
void apply() const
|
|
||||||
{
|
|
||||||
Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.dimension_0()), *this);
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(int i) const { m_array(i) = i; }
|
|
||||||
};
|
|
||||||
|
|
||||||
struct RandomIndexes
|
|
||||||
{
|
|
||||||
index_array_type m_indexes;
|
|
||||||
typename index_array_type::HostMirror m_host_indexes;
|
|
||||||
RandomIndexes( const index_array_type & indexes)
|
|
||||||
: m_indexes(indexes)
|
|
||||||
, m_host_indexes(Kokkos::create_mirror(m_indexes))
|
|
||||||
{}
|
|
||||||
|
|
||||||
void apply() const
|
|
||||||
{
|
|
||||||
Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::HostSpace::execution_space,int>(0,m_host_indexes.dimension_0()), *this);
|
|
||||||
//random shuffle
|
|
||||||
Kokkos::HostSpace::execution_space::fence();
|
|
||||||
std::random_shuffle(m_host_indexes.ptr_on_device(), m_host_indexes.ptr_on_device() + m_host_indexes.dimension_0());
|
|
||||||
Kokkos::deep_copy(m_indexes,m_host_indexes);
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(int i) const { m_host_indexes(i) = i; }
|
|
||||||
};
|
|
||||||
|
|
||||||
struct RandomReduce
|
|
||||||
{
|
|
||||||
const_array_type m_array;
|
|
||||||
const_index_array_type m_indexes;
|
|
||||||
RandomReduce( const const_array_type & array, const const_index_array_type & indexes)
|
|
||||||
: m_array(array)
|
|
||||||
, m_indexes(indexes)
|
|
||||||
{}
|
|
||||||
|
|
||||||
void apply(T & reduce) const
|
|
||||||
{
|
|
||||||
Kokkos::parallel_reduce( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.dimension_0()), *this, reduce);
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(int i, T & reduce) const
|
|
||||||
{ reduce += m_array(m_indexes(i)); }
|
|
||||||
};
|
|
||||||
|
|
||||||
static void run(int size, double & reduce_time, T &reduce)
|
|
||||||
{
|
|
||||||
array_type array("array",size);
|
|
||||||
index_array_type indexes("indexes",size);
|
|
||||||
|
|
||||||
{ FillArray f(array); f.apply(); }
|
|
||||||
{ RandomIndexes f(indexes); f.apply(); }
|
|
||||||
|
|
||||||
Kokkos::Cuda::fence();
|
|
||||||
|
|
||||||
Kokkos::Impl::Timer timer;
|
|
||||||
for (int j=0; j<10; ++j) {
|
|
||||||
RandomReduce f(array,indexes);
|
|
||||||
f.apply(reduce);
|
|
||||||
}
|
|
||||||
Kokkos::Cuda::fence();
|
|
||||||
reduce_time = timer.seconds();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // unnamed namespace
|
|
||||||
|
|
||||||
TEST_F( cuda, texture_double )
|
|
||||||
{
|
|
||||||
printf("Random reduce of double through texture fetch\n");
|
|
||||||
for (int i=1; i<=26; ++i) {
|
|
||||||
int size = 1<<i;
|
|
||||||
double time = 0;
|
|
||||||
double reduce = 0;
|
|
||||||
TextureFetch<double>::run(size,time,reduce);
|
|
||||||
printf(" time = %1.3e size = 2^%d\n", time, i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Test
|
|
||||||
|
|
||||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
|
||||||
|
|
||||||
@ -1,152 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
// mfh 06 Jun 2013: This macro doesn't work like one might thing it
|
|
||||||
// should. It doesn't take the template parameter DeviceType and
|
|
||||||
// print its actual type name; it just literally prints out
|
|
||||||
// "DeviceType". I've worked around this below without using the
|
|
||||||
// macro, so I'm commenting out the macro to avoid compiler complaints
|
|
||||||
// about an unused macro.
|
|
||||||
|
|
||||||
// #define KOKKOS_MACRO_IMPL_TO_STRING( X ) #X
|
|
||||||
// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_MACRO_IMPL_TO_STRING( X )
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
enum { NUMBER_OF_TRIALS = 5 };
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template< class DeviceType >
|
|
||||||
void run_test_hexgrad( int exp_beg , int exp_end, const char deviceTypeName[] )
|
|
||||||
{
|
|
||||||
std::string label_hexgrad ;
|
|
||||||
label_hexgrad.append( "\"HexGrad< double , " );
|
|
||||||
// mfh 06 Jun 2013: This only appends "DeviceType" (literally) to
|
|
||||||
// the string, not the actual name of the device type. Thus, I've
|
|
||||||
// modified the function to take the name of the device type.
|
|
||||||
//
|
|
||||||
//label_hexgrad.append( KOKKOS_MACRO_TO_STRING( DeviceType ) );
|
|
||||||
label_hexgrad.append( deviceTypeName );
|
|
||||||
label_hexgrad.append( " >\"" );
|
|
||||||
|
|
||||||
for (int i = exp_beg ; i < exp_end ; ++i) {
|
|
||||||
double min_seconds = 0.0 ;
|
|
||||||
double max_seconds = 0.0 ;
|
|
||||||
double avg_seconds = 0.0 ;
|
|
||||||
|
|
||||||
const int parallel_work_length = 1<<i;
|
|
||||||
|
|
||||||
for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) {
|
|
||||||
const double seconds = HexGrad< DeviceType >::test(parallel_work_length) ;
|
|
||||||
|
|
||||||
if ( 0 == j ) {
|
|
||||||
min_seconds = seconds ;
|
|
||||||
max_seconds = seconds ;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if ( seconds < min_seconds ) min_seconds = seconds ;
|
|
||||||
if ( seconds > max_seconds ) max_seconds = seconds ;
|
|
||||||
}
|
|
||||||
avg_seconds += seconds ;
|
|
||||||
}
|
|
||||||
avg_seconds /= NUMBER_OF_TRIALS ;
|
|
||||||
|
|
||||||
std::cout << label_hexgrad
|
|
||||||
<< " , " << parallel_work_length
|
|
||||||
<< " , " << min_seconds
|
|
||||||
<< " , " << ( min_seconds / parallel_work_length )
|
|
||||||
<< std::endl ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template< class DeviceType >
|
|
||||||
void run_test_gramschmidt( int exp_beg , int exp_end, const char deviceTypeName[] )
|
|
||||||
{
|
|
||||||
std::string label_gramschmidt ;
|
|
||||||
label_gramschmidt.append( "\"GramSchmidt< double , " );
|
|
||||||
// mfh 06 Jun 2013: This only appends "DeviceType" (literally) to
|
|
||||||
// the string, not the actual name of the device type. Thus, I've
|
|
||||||
// modified the function to take the name of the device type.
|
|
||||||
//
|
|
||||||
//label_gramschmidt.append( KOKKOS_MACRO_TO_STRING( DeviceType ) );
|
|
||||||
label_gramschmidt.append( deviceTypeName );
|
|
||||||
label_gramschmidt.append( " >\"" );
|
|
||||||
|
|
||||||
for (int i = exp_beg ; i < exp_end ; ++i) {
|
|
||||||
double min_seconds = 0.0 ;
|
|
||||||
double max_seconds = 0.0 ;
|
|
||||||
double avg_seconds = 0.0 ;
|
|
||||||
|
|
||||||
const int parallel_work_length = 1<<i;
|
|
||||||
|
|
||||||
for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) {
|
|
||||||
const double seconds = ModifiedGramSchmidt< double , DeviceType >::test(parallel_work_length, 32 ) ;
|
|
||||||
|
|
||||||
if ( 0 == j ) {
|
|
||||||
min_seconds = seconds ;
|
|
||||||
max_seconds = seconds ;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if ( seconds < min_seconds ) min_seconds = seconds ;
|
|
||||||
if ( seconds > max_seconds ) max_seconds = seconds ;
|
|
||||||
}
|
|
||||||
avg_seconds += seconds ;
|
|
||||||
}
|
|
||||||
avg_seconds /= NUMBER_OF_TRIALS ;
|
|
||||||
|
|
||||||
std::cout << label_gramschmidt
|
|
||||||
<< " , " << parallel_work_length
|
|
||||||
<< " , " << min_seconds
|
|
||||||
<< " , " << ( min_seconds / parallel_work_length )
|
|
||||||
<< std::endl ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@ -1,226 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
#include <PerfTestBlasKernels.hpp>
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
//----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
// Reduction : result = dot( Q(:,j) , Q(:,j) );
|
|
||||||
// PostProcess : R(j,j) = result ; inv = 1 / result ;
|
|
||||||
template< class VectorView , class ValueView >
|
|
||||||
struct InvNorm2 : public Kokkos::DotSingle< VectorView > {
|
|
||||||
|
|
||||||
typedef typename Kokkos::DotSingle< VectorView >::value_type value_type ;
|
|
||||||
|
|
||||||
ValueView Rjj ;
|
|
||||||
ValueView inv ;
|
|
||||||
|
|
||||||
InvNorm2( const VectorView & argX ,
|
|
||||||
const ValueView & argR ,
|
|
||||||
const ValueView & argInv )
|
|
||||||
: Kokkos::DotSingle< VectorView >( argX )
|
|
||||||
, Rjj( argR )
|
|
||||||
, inv( argInv )
|
|
||||||
{}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void final( value_type & result ) const
|
|
||||||
{
|
|
||||||
result = sqrt( result );
|
|
||||||
Rjj() = result ;
|
|
||||||
inv() = ( 0 < result ) ? 1.0 / result : 0 ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template< class VectorView , class ValueView >
|
|
||||||
inline
|
|
||||||
void invnorm2( const VectorView & x ,
|
|
||||||
const ValueView & r ,
|
|
||||||
const ValueView & r_inv )
|
|
||||||
{
|
|
||||||
Kokkos::parallel_reduce( x.dimension_0() , InvNorm2< VectorView , ValueView >( x , r , r_inv ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
// PostProcess : tmp = - ( R(j,k) = result );
|
|
||||||
template< class VectorView , class ValueView >
|
|
||||||
struct DotM : public Kokkos::Dot< VectorView > {
|
|
||||||
|
|
||||||
typedef typename Kokkos::Dot< VectorView >::value_type value_type ;
|
|
||||||
|
|
||||||
ValueView Rjk ;
|
|
||||||
ValueView tmp ;
|
|
||||||
|
|
||||||
DotM( const VectorView & argX ,
|
|
||||||
const VectorView & argY ,
|
|
||||||
const ValueView & argR ,
|
|
||||||
const ValueView & argTmp )
|
|
||||||
: Kokkos::Dot< VectorView >( argX , argY )
|
|
||||||
, Rjk( argR )
|
|
||||||
, tmp( argTmp )
|
|
||||||
{}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void final( value_type & result ) const
|
|
||||||
{
|
|
||||||
Rjk() = result ;
|
|
||||||
tmp() = - result ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template< class VectorView , class ValueView >
|
|
||||||
inline
|
|
||||||
void dot_neg( const VectorView & x ,
|
|
||||||
const VectorView & y ,
|
|
||||||
const ValueView & r ,
|
|
||||||
const ValueView & r_neg )
|
|
||||||
{
|
|
||||||
Kokkos::parallel_reduce( x.dimension_0() , DotM< VectorView , ValueView >( x , y , r , r_neg ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template< typename Scalar , class DeviceType >
|
|
||||||
struct ModifiedGramSchmidt
|
|
||||||
{
|
|
||||||
typedef DeviceType execution_space ;
|
|
||||||
typedef typename execution_space::size_type size_type ;
|
|
||||||
|
|
||||||
typedef Kokkos::View< Scalar** ,
|
|
||||||
Kokkos::LayoutLeft ,
|
|
||||||
execution_space > multivector_type ;
|
|
||||||
|
|
||||||
typedef Kokkos::View< Scalar* ,
|
|
||||||
Kokkos::LayoutLeft ,
|
|
||||||
execution_space > vector_type ;
|
|
||||||
|
|
||||||
typedef Kokkos::View< Scalar ,
|
|
||||||
Kokkos::LayoutLeft ,
|
|
||||||
execution_space > value_view ;
|
|
||||||
|
|
||||||
|
|
||||||
multivector_type Q ;
|
|
||||||
multivector_type R ;
|
|
||||||
|
|
||||||
static double factorization( const multivector_type Q_ ,
|
|
||||||
const multivector_type R_ )
|
|
||||||
{
|
|
||||||
const size_type count = Q_.dimension_1();
|
|
||||||
value_view tmp("tmp");
|
|
||||||
value_view one("one");
|
|
||||||
|
|
||||||
Kokkos::deep_copy( one , (Scalar) 1 );
|
|
||||||
|
|
||||||
Kokkos::Impl::Timer timer ;
|
|
||||||
|
|
||||||
for ( size_type j = 0 ; j < count ; ++j ) {
|
|
||||||
// Reduction : tmp = dot( Q(:,j) , Q(:,j) );
|
|
||||||
// PostProcess : tmp = sqrt( tmp ); R(j,j) = tmp ; tmp = 1 / tmp ;
|
|
||||||
const vector_type Qj = Kokkos::subview( Q_ , Kokkos::ALL() , j );
|
|
||||||
const value_view Rjj = Kokkos::subview( R_ , j , j );
|
|
||||||
|
|
||||||
invnorm2( Qj , Rjj , tmp );
|
|
||||||
|
|
||||||
// Q(:,j) *= ( 1 / R(j,j) ); => Q(:,j) *= tmp ;
|
|
||||||
Kokkos::scale( tmp , Qj );
|
|
||||||
|
|
||||||
for ( size_t k = j + 1 ; k < count ; ++k ) {
|
|
||||||
const vector_type Qk = Kokkos::subview( Q_ , Kokkos::ALL() , k );
|
|
||||||
const value_view Rjk = Kokkos::subview( R_ , j , k );
|
|
||||||
|
|
||||||
// Reduction : R(j,k) = dot( Q(:,j) , Q(:,k) );
|
|
||||||
// PostProcess : tmp = - R(j,k);
|
|
||||||
dot_neg( Qj , Qk , Rjk , tmp );
|
|
||||||
|
|
||||||
// Q(:,k) -= R(j,k) * Q(:,j); => Q(:,k) += tmp * Q(:,j)
|
|
||||||
Kokkos::axpby( tmp , Qj , one , Qk );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
execution_space::fence();
|
|
||||||
|
|
||||||
return timer.seconds();
|
|
||||||
}
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------------
|
|
||||||
|
|
||||||
static double test( const size_t length ,
|
|
||||||
const size_t count ,
|
|
||||||
const size_t iter = 1 )
|
|
||||||
{
|
|
||||||
multivector_type Q_( "Q" , length , count );
|
|
||||||
multivector_type R_( "R" , count , count );
|
|
||||||
|
|
||||||
typename multivector_type::HostMirror A =
|
|
||||||
Kokkos::create_mirror( Q_ );
|
|
||||||
|
|
||||||
// Create and fill A on the host
|
|
||||||
|
|
||||||
for ( size_type j = 0 ; j < count ; ++j ) {
|
|
||||||
for ( size_type i = 0 ; i < length ; ++i ) {
|
|
||||||
A(i,j) = ( i + 1 ) * ( j + 1 );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
double dt_min = 0 ;
|
|
||||||
|
|
||||||
for ( size_t i = 0 ; i < iter ; ++i ) {
|
|
||||||
|
|
||||||
Kokkos::deep_copy( Q_ , A );
|
|
||||||
|
|
||||||
// A = Q * R
|
|
||||||
|
|
||||||
const double dt = factorization( Q_ , R_ );
|
|
||||||
|
|
||||||
if ( 0 == i ) dt_min = dt ;
|
|
||||||
else dt_min = dt < dt_min ? dt : dt_min ;
|
|
||||||
}
|
|
||||||
|
|
||||||
return dt_min ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@ -1,268 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
template< class DeviceType ,
|
|
||||||
typename CoordScalarType = double ,
|
|
||||||
typename GradScalarType = float >
|
|
||||||
struct HexGrad
|
|
||||||
{
|
|
||||||
typedef DeviceType execution_space ;
|
|
||||||
typedef typename execution_space::size_type size_type ;
|
|
||||||
|
|
||||||
typedef HexGrad<DeviceType,CoordScalarType,GradScalarType> self_type;
|
|
||||||
|
|
||||||
// 3D array : ( ParallelWork , Space , Node )
|
|
||||||
|
|
||||||
enum { NSpace = 3 , NNode = 8 };
|
|
||||||
|
|
||||||
typedef Kokkos::View< CoordScalarType*[NSpace][NNode] , execution_space >
|
|
||||||
elem_coord_type ;
|
|
||||||
|
|
||||||
typedef Kokkos::View< GradScalarType*[NSpace][NNode] , execution_space >
|
|
||||||
elem_grad_type ;
|
|
||||||
|
|
||||||
elem_coord_type coords ;
|
|
||||||
elem_grad_type grad_op ;
|
|
||||||
|
|
||||||
enum { FLOPS = 318 }; // = 3 * ( 18 + 8 * 11 ) };
|
|
||||||
enum { READS = 18 };
|
|
||||||
enum { WRITES = 18 };
|
|
||||||
|
|
||||||
HexGrad( const elem_coord_type & arg_coords ,
|
|
||||||
const elem_grad_type & arg_grad_op )
|
|
||||||
: coords( arg_coords )
|
|
||||||
, grad_op( arg_grad_op )
|
|
||||||
{}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION static
|
|
||||||
void grad( const CoordScalarType x[] ,
|
|
||||||
const CoordScalarType z[] ,
|
|
||||||
GradScalarType grad_y[] )
|
|
||||||
{
|
|
||||||
const GradScalarType R42=(x[3] - x[1]);
|
|
||||||
const GradScalarType R52=(x[4] - x[1]);
|
|
||||||
const GradScalarType R54=(x[4] - x[3]);
|
|
||||||
|
|
||||||
const GradScalarType R63=(x[5] - x[2]);
|
|
||||||
const GradScalarType R83=(x[7] - x[2]);
|
|
||||||
const GradScalarType R86=(x[7] - x[5]);
|
|
||||||
|
|
||||||
const GradScalarType R31=(x[2] - x[0]);
|
|
||||||
const GradScalarType R61=(x[5] - x[0]);
|
|
||||||
const GradScalarType R74=(x[6] - x[3]);
|
|
||||||
|
|
||||||
const GradScalarType R72=(x[6] - x[1]);
|
|
||||||
const GradScalarType R75=(x[6] - x[4]);
|
|
||||||
const GradScalarType R81=(x[7] - x[0]);
|
|
||||||
|
|
||||||
const GradScalarType t1=(R63 + R54);
|
|
||||||
const GradScalarType t2=(R61 + R74);
|
|
||||||
const GradScalarType t3=(R72 + R81);
|
|
||||||
|
|
||||||
const GradScalarType t4 =(R86 + R42);
|
|
||||||
const GradScalarType t5 =(R83 + R52);
|
|
||||||
const GradScalarType t6 =(R75 + R31);
|
|
||||||
|
|
||||||
// Calculate Y gradient from X and Z data
|
|
||||||
|
|
||||||
grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54);
|
|
||||||
grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61);
|
|
||||||
grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72);
|
|
||||||
grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83);
|
|
||||||
grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61);
|
|
||||||
grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72);
|
|
||||||
grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83);
|
|
||||||
grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54);
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()( size_type ielem ) const
|
|
||||||
{
|
|
||||||
GradScalarType g[NNode] ;
|
|
||||||
|
|
||||||
const CoordScalarType x[NNode] = {
|
|
||||||
coords(ielem,0,0),
|
|
||||||
coords(ielem,0,1),
|
|
||||||
coords(ielem,0,2),
|
|
||||||
coords(ielem,0,3),
|
|
||||||
coords(ielem,0,4),
|
|
||||||
coords(ielem,0,5),
|
|
||||||
coords(ielem,0,6),
|
|
||||||
coords(ielem,0,7)
|
|
||||||
};
|
|
||||||
|
|
||||||
const CoordScalarType y[NNode] = {
|
|
||||||
coords(ielem,1,0),
|
|
||||||
coords(ielem,1,1),
|
|
||||||
coords(ielem,1,2),
|
|
||||||
coords(ielem,1,3),
|
|
||||||
coords(ielem,1,4),
|
|
||||||
coords(ielem,1,5),
|
|
||||||
coords(ielem,1,6),
|
|
||||||
coords(ielem,1,7)
|
|
||||||
};
|
|
||||||
|
|
||||||
const CoordScalarType z[NNode] = {
|
|
||||||
coords(ielem,2,0),
|
|
||||||
coords(ielem,2,1),
|
|
||||||
coords(ielem,2,2),
|
|
||||||
coords(ielem,2,3),
|
|
||||||
coords(ielem,2,4),
|
|
||||||
coords(ielem,2,5),
|
|
||||||
coords(ielem,2,6),
|
|
||||||
coords(ielem,2,7)
|
|
||||||
};
|
|
||||||
|
|
||||||
grad( z , y , g );
|
|
||||||
|
|
||||||
grad_op(ielem,0,0) = g[0];
|
|
||||||
grad_op(ielem,0,1) = g[1];
|
|
||||||
grad_op(ielem,0,2) = g[2];
|
|
||||||
grad_op(ielem,0,3) = g[3];
|
|
||||||
grad_op(ielem,0,4) = g[4];
|
|
||||||
grad_op(ielem,0,5) = g[5];
|
|
||||||
grad_op(ielem,0,6) = g[6];
|
|
||||||
grad_op(ielem,0,7) = g[7];
|
|
||||||
|
|
||||||
grad( x , z , g );
|
|
||||||
|
|
||||||
grad_op(ielem,1,0) = g[0];
|
|
||||||
grad_op(ielem,1,1) = g[1];
|
|
||||||
grad_op(ielem,1,2) = g[2];
|
|
||||||
grad_op(ielem,1,3) = g[3];
|
|
||||||
grad_op(ielem,1,4) = g[4];
|
|
||||||
grad_op(ielem,1,5) = g[5];
|
|
||||||
grad_op(ielem,1,6) = g[6];
|
|
||||||
grad_op(ielem,1,7) = g[7];
|
|
||||||
|
|
||||||
grad( y , x , g );
|
|
||||||
|
|
||||||
grad_op(ielem,2,0) = g[0];
|
|
||||||
grad_op(ielem,2,1) = g[1];
|
|
||||||
grad_op(ielem,2,2) = g[2];
|
|
||||||
grad_op(ielem,2,3) = g[3];
|
|
||||||
grad_op(ielem,2,4) = g[4];
|
|
||||||
grad_op(ielem,2,5) = g[5];
|
|
||||||
grad_op(ielem,2,6) = g[6];
|
|
||||||
grad_op(ielem,2,7) = g[7];
|
|
||||||
}
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------------
|
|
||||||
|
|
||||||
struct Init {
|
|
||||||
typedef typename self_type::execution_space execution_space ;
|
|
||||||
|
|
||||||
elem_coord_type coords ;
|
|
||||||
|
|
||||||
Init( const elem_coord_type & arg_coords )
|
|
||||||
: coords( arg_coords ) {}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()( size_type ielem ) const
|
|
||||||
{
|
|
||||||
coords(ielem,0,0) = 0.;
|
|
||||||
coords(ielem,1,0) = 0.;
|
|
||||||
coords(ielem,2,0) = 0.;
|
|
||||||
|
|
||||||
coords(ielem,0,1) = 1.;
|
|
||||||
coords(ielem,1,1) = 0.;
|
|
||||||
coords(ielem,2,1) = 0.;
|
|
||||||
|
|
||||||
coords(ielem,0,2) = 1.;
|
|
||||||
coords(ielem,1,2) = 1.;
|
|
||||||
coords(ielem,2,2) = 0.;
|
|
||||||
|
|
||||||
coords(ielem,0,3) = 0.;
|
|
||||||
coords(ielem,1,3) = 1.;
|
|
||||||
coords(ielem,2,3) = 0.;
|
|
||||||
|
|
||||||
|
|
||||||
coords(ielem,0,4) = 0.;
|
|
||||||
coords(ielem,1,4) = 0.;
|
|
||||||
coords(ielem,2,4) = 1.;
|
|
||||||
|
|
||||||
coords(ielem,0,5) = 1.;
|
|
||||||
coords(ielem,1,5) = 0.;
|
|
||||||
coords(ielem,2,5) = 1.;
|
|
||||||
|
|
||||||
coords(ielem,0,6) = 1.;
|
|
||||||
coords(ielem,1,6) = 1.;
|
|
||||||
coords(ielem,2,6) = 1.;
|
|
||||||
|
|
||||||
coords(ielem,0,7) = 0.;
|
|
||||||
coords(ielem,1,7) = 1.;
|
|
||||||
coords(ielem,2,7) = 1.;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------------
|
|
||||||
|
|
||||||
static double test( const int count , const int iter = 1 )
|
|
||||||
{
|
|
||||||
elem_coord_type coord( "coord" , count );
|
|
||||||
elem_grad_type grad ( "grad" , count );
|
|
||||||
|
|
||||||
// Execute the parallel kernels on the arrays:
|
|
||||||
|
|
||||||
double dt_min = 0 ;
|
|
||||||
|
|
||||||
Kokkos::parallel_for( count , Init( coord ) );
|
|
||||||
execution_space::fence();
|
|
||||||
|
|
||||||
for ( int i = 0 ; i < iter ; ++i ) {
|
|
||||||
Kokkos::Impl::Timer timer ;
|
|
||||||
Kokkos::parallel_for( count , HexGrad<execution_space>( coord , grad ) );
|
|
||||||
execution_space::fence();
|
|
||||||
const double dt = timer.seconds();
|
|
||||||
if ( 0 == i ) dt_min = dt ;
|
|
||||||
else dt_min = dt < dt_min ? dt : dt_min ;
|
|
||||||
}
|
|
||||||
|
|
||||||
return dt_min ;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@ -1,104 +0,0 @@
|
|||||||
/*
|
|
||||||
//@HEADER
|
|
||||||
// ************************************************************************
|
|
||||||
//
|
|
||||||
// Kokkos v. 2.0
|
|
||||||
// Copyright (2014) Sandia Corporation
|
|
||||||
//
|
|
||||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
||||||
// the U.S. Government retains certain rights in this software.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without
|
|
||||||
// modification, are permitted provided that the following conditions are
|
|
||||||
// met:
|
|
||||||
//
|
|
||||||
// 1. Redistributions of source code must retain the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
// notice, this list of conditions and the following disclaimer in the
|
|
||||||
// documentation and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// 3. Neither the name of the Corporation nor the names of the
|
|
||||||
// contributors may be used to endorse or promote products derived from
|
|
||||||
// this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
||||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
||||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
//
|
|
||||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
|
||||||
//
|
|
||||||
// ************************************************************************
|
|
||||||
//@HEADER
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
|
||||||
|
|
||||||
#if defined( KOKKOS_HAVE_OPENMP )
|
|
||||||
|
|
||||||
typedef Kokkos::OpenMP TestHostDevice ;
|
|
||||||
const char TestHostDeviceName[] = "Kokkos::OpenMP" ;
|
|
||||||
|
|
||||||
#elif defined( KOKKOS_HAVE_PTHREAD )
|
|
||||||
|
|
||||||
typedef Kokkos::Threads TestHostDevice ;
|
|
||||||
const char TestHostDeviceName[] = "Kokkos::Threads" ;
|
|
||||||
|
|
||||||
#elif defined( KOKKOS_HAVE_SERIAL )
|
|
||||||
|
|
||||||
typedef Kokkos::Serial TestHostDevice ;
|
|
||||||
const char TestHostDeviceName[] = "Kokkos::Serial" ;
|
|
||||||
|
|
||||||
#else
|
|
||||||
# error "You must enable at least one of the following execution spaces in order to build this test: Kokkos::Threads, Kokkos::OpenMP, or Kokkos::Serial."
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <impl/Kokkos_Timer.hpp>
|
|
||||||
|
|
||||||
#include <PerfTestHexGrad.hpp>
|
|
||||||
#include <PerfTestBlasKernels.hpp>
|
|
||||||
#include <PerfTestGramSchmidt.hpp>
|
|
||||||
#include <PerfTestDriver.hpp>
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------
|
|
||||||
|
|
||||||
namespace Test {
|
|
||||||
|
|
||||||
class host : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
static void SetUpTestCase()
|
|
||||||
{
|
|
||||||
const unsigned team_count = Kokkos::hwloc::get_available_numa_count();
|
|
||||||
const unsigned threads_per_team = 4 ;
|
|
||||||
|
|
||||||
TestHostDevice::initialize( team_count * threads_per_team );
|
|
||||||
}
|
|
||||||
|
|
||||||
static void TearDownTestCase()
|
|
||||||
{
|
|
||||||
TestHostDevice::finalize();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F( host, hexgrad ) {
|
|
||||||
EXPECT_NO_THROW(run_test_hexgrad< TestHostDevice>( 10, 20, TestHostDeviceName ));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F( host, gramschmidt ) {
|
|
||||||
EXPECT_NO_THROW(run_test_gramschmidt< TestHostDevice>( 10, 20, TestHostDeviceName ));
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Test
|
|
||||||
|
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user