git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14370 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2015-12-15 22:26:37 +00:00
parent 31f22919ab
commit 06a217aa08
327 changed files with 0 additions and 95949 deletions

View File

@ -1,40 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -1,40 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -1,377 +0,0 @@
# Default settings common options
KOKKOS_PATH=../../lib/kokkos
#Options: OpenMP,Serial,Pthreads,Cuda
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthreads"
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,ARMv8,BGQ,Power7,Power8
KOKKOS_ARCH ?= ""
#Options: yes,no
KOKKOS_DEBUG ?= "no"
#Options: hwloc,librt
KOKKOS_USE_TPLS ?= ""
#Options: c++11
KOKKOS_CXX_STANDARD ?= "c++11"
#Options: kernel_times,aggregate_mpi
KOKKOS_PROFILING ?= ""
#Default settings specific options
#Options: force_uvm,use_ldg,rdc,enable_lambda
KOKKOS_CUDA_OPTIONS ?= ""
# Check for general settings
KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
KOKKOS_INTERNAL_ENABLE_PROFILING_COLLECT_KERNEL_DATA := $(strip $(shell echo $(KOKKOS_PROFILING) | grep "kernel_times" | wc -l))
KOKKOS_INTERNAL_ENABLE_PROFILING_AGGREGATE_MPI := $(strip $(shell echo $(KOKKOS_PROFILING) | grep "aggregate_mpi" | wc -l))
KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
# Check for external libraries
KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l))
# Check for advanced settings
KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l))
# Check for Kokkos Host Execution Spaces one of which must be on
KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l))
KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l))
KOKKOS_INTERNAL_USE_QTHREAD := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthread | wc -l))
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
KOKKOS_INTERNAL_USE_SERIAL := 1
endif
endif
KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l)
KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)
KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)
KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname | grep CYGWIN | wc -l)
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -mp
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
# OpenMP is turned on by default in Cray compiler environment
KOKKOS_INTERNAL_OPENMP_FLAG :=
else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_INTERNAL_CXX11_FLAG := --c++11
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
else
KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
endif
endif
endif
# Check for other Execution Spaces
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
# Check for Kokkos Architecture settings
#Intel based
KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l))
KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
#NVIDIA based
KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l))
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
endif
#ARM based
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l))
#IBM based
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc))
#AMD based
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
#Any AVX?
KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW) | bc ))
#Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc ))
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
$(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1)
$(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
endif
#Generating the list of Flags
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
# No warnings:
KOKKOS_CXXFLAGS =
# INTEL and CLANG warnings:
#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
# GCC warnings:
#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized -Wignored-qualifiers -Wempty-body -Wclobbered
KOKKOS_LIBS = -lkokkos
KOKKOS_LDFLAGS = -L$(shell pwd)
KOKKOS_SRC =
KOKKOS_HEADERS =
#Generating the KokkosCore_config.h file
tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp)
tmp := $(shell date >> KokkosCore_config.tmp)
tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp)
tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
KOKKOS_CPPFLAGS += -I$(QTHREAD_PATH)/include
KOKKOS_LDFLAGS += -L$(QTHREAD_PATH)/lib
tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREAD 1" >> KokkosCore_config.tmp )
endif
tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_CXXFLAGS += -G
endif
KOKKOS_CXXFLAGS += -g
KOKKOS_LDFLAGS += -g -ldl
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
KOKKOS_LIBS += -lhwloc
tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp )
KOKKOS_LIBS += -lrt
endif
tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += --relocatable-device-code=true
KOKKOS_LDFLAGS += --relocatable-device-code=true
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -expt-extended-lambda
endif
#Add Architecture flags
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
KOKKOS_LDFLAGS +=
else
KOKKOS_CXXFLAGS += -mavx
KOKKOS_LDFLAGS += -mavx
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
KOKKOS_CXXFLAGS += -mcpu=power8
KOKKOS_LDFLAGS += -mcpu=power8
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
KOKKOS_CXXFLAGS += -march=core-avx2
KOKKOS_LDFLAGS += -march=core-avx2
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
KOKKOS_CXXFLAGS += -mmic
KOKKOS_LDFLAGS += -mmic
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
KOKKOS_CXXFLAGS += -arch=sm_30
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
KOKKOS_CXXFLAGS += -arch=sm_32
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
KOKKOS_CXXFLAGS += -arch=sm_35
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
KOKKOS_CXXFLAGS += -arch=sm_37
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
KOKKOS_CXXFLAGS += -arch=sm_50
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
KOKKOS_CXXFLAGS += -arch=sm_52
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
KOKKOS_CXXFLAGS += -arch=sm_53
endif
endif
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h)
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l))
else
KOKKOS_INTERNAL_NEW_CONFIG := 1
endif
ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h)
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
KOKKOS_LIBS += -lcudart -lcuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_LIBS += -lpthread
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
KOKKOS_LIBS += -lqthread
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
else
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
endif
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
endif
#With Cygwin functions such as fdopen and fileno are not defined
#when strict ansi is enabled. strict ansi gets enabled with --std=c++11
#though. So we hard undefine it here. Not sure if that has any bad side effects
#This is needed for gtest actually, not for Kokkos itself!
ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1)
KOKKOS_CXXFLAGS += -U__STRICT_ANSI__
endif
# Setting up dependencies
KokkosCore_config.h:
KOKKOS_CPP_DEPENDS := KokkosCore_config.h $(KOKKOS_HEADERS)
KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o)
KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ))
include $(KOKKOS_PATH)/Makefile.targets
kokkos-clean:
rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
ranlib libkokkos.a
KOKKOS_LINK_DEPENDS=libkokkos.a

View File

@ -1,57 +0,0 @@
Kokkos_UnorderedMap_impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
Kokkos_AllocationTracker.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp
Kokkos_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp
Kokkos_Core.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp
Kokkos_Error.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp
Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
Kokkos_Cuda_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
Kokkos_Cuda_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
Kokkos_QthreadExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp
Kokkos_Qthread_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
Kokkos_OpenMPexec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
endif

View File

@ -1,126 +0,0 @@
Kokkos implements a programming model in C++ for writing performance portable
applications targeting all major HPC platforms. For that purpose it provides
abstractions for both parallel execution of code and data management.
Kokkos is designed to target complex node architectures with N-level memory
hierarchies and multiple types of execution resources. It currently can use
OpenMP, Pthreads and CUDA as backend programming models.
The core developers of Kokkos are Carter Edwards and Christian Trott
at the Computer Science Research Institute of the Sandia National
Laboratories.
The KokkosP interface and associated tools are developed by the Application
Performance Team and Kokkos core developers at Sandia National Laboratories.
To learn more about Kokkos consider watching one of our presentations:
GTC 2015:
http://on-demand.gputechconf.com/gtc/2015/video/S5166.html
http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf
A programming guide can be found under doc/Kokkos_PG.pdf. This is an initial version
and feedback is greatly appreciated.
For questions please send an email to
kokkos-users@software.sandia.gov
For non-public questions send an email to
hcedwar(at)sandia.gov and crtrott(at)sandia.gov
============================================================================
====Requirements============================================================
============================================================================
Primary tested compilers are:
GCC 4.7.2
GCC 4.8.4
GCC 4.9.2
GCC 5.1.0
Intel 14.0.4
Intel 15.0.2
Clang 3.5.2
Clang 3.6.1
Secondary tested compilers are:
CUDA 6.5 (with gcc 4.7.2)
CUDA 7.0 (with gcc 4.7.2)
Other compilers working:
PGI 15.4
IBM XL 13.1.2
Cygwin 2.1.0 64bit with gcc 4.9.3
Primary tested compiler are passing in release mode
with warnings as errors. We are using the following set
of flags:
GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits
-Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
Secondary compilers are passing without -Werror.
Other compilers are tested occasionally.
============================================================================
====Getting started=========================================================
============================================================================
In the 'example/tutorial' directory you will find step by step tutorial
examples which explain many of the features of Kokkos. They work with
simple Makefiles. To build with g++ and OpenMP simply type 'make openmp'
in the 'example/tutorial' directory. This will build all examples in the
subfolders.
============================================================================
====Running Unit Tests======================================================
============================================================================
To run the unit tests create a build directory and run the following commands
KOKKOS_PATH/generate_makefile.bash
make build-test
make test
Run KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
changing the device type for which to build.
============================================================================
====Install the library=====================================================
============================================================================
To install Kokkos as a library create a build directory and run the following
KOKKOS_PATH/generate_makefile.bash --prefix=INSTALL_PATH
make lib
make install
KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
changing the device type for which to build.
============================================================================
====CMakeFiles==============================================================
============================================================================
The CMake files contained in this repository require Tribits and are used
for integration with Trilinos. They do not currently support a standalone
CMake build.
===========================================================================
====Kokkos and CUDA UVM====================================================
===========================================================================
Kokkos does support UVM as a specific memory space called CudaUVMSpace.
Allocations made with that space are accessible from host and device.
You can tell Kokkos to use that as the default space for Cuda allocations.
In either case UVM comes with a number of restrictions:
(i) You can't access allocations on the host while a kernel is potentially
running. This will lead to segfaults. To avoid that you either need to
call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or
you can set the environment variable CUDA_LAUNCH_BLOCKING=1.
Furthermore in multi socket multi GPU machines, UVM defaults to using
zero copy allocations for technical reasons related to using multiple
GPUs from the same process. If an executable doesn't do that (e.g. each
MPI rank of an application uses a single GPU [can be the same GPU for
multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
This will enforce proper UVM allocations, but can lead to errors if
more than a single GPU is used by a single process.

File diff suppressed because it is too large Load Diff

View File

@ -1,496 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SORT_HPP_
#define KOKKOS_SORT_HPP_
#include <Kokkos_Core.hpp>
#include <algorithm>
namespace Kokkos {
namespace SortImpl {
template<class ValuesViewType, int Rank=ValuesViewType::Rank>
struct CopyOp;
template<class ValuesViewType>
struct CopyOp<ValuesViewType,1> {
template<class DstType, class SrcType>
KOKKOS_INLINE_FUNCTION
static void copy(DstType& dst, size_t i_dst,
SrcType& src, size_t i_src ) {
dst(i_dst) = src(i_src);
}
};
template<class ValuesViewType>
struct CopyOp<ValuesViewType,2> {
template<class DstType, class SrcType>
KOKKOS_INLINE_FUNCTION
static void copy(DstType& dst, size_t i_dst,
SrcType& src, size_t i_src ) {
for(int j = 0;j< (int) dst.dimension_1(); j++)
dst(i_dst,j) = src(i_src,j);
}
};
template<class ValuesViewType>
struct CopyOp<ValuesViewType,3> {
template<class DstType, class SrcType>
KOKKOS_INLINE_FUNCTION
static void copy(DstType& dst, size_t i_dst,
SrcType& src, size_t i_src ) {
for(int j = 0; j<dst.dimension_1(); j++)
for(int k = 0; k<dst.dimension_2(); k++)
dst(i_dst,j,k) = src(i_src,j,k);
}
};
}
template<class KeyViewType, class BinSortOp, class ExecutionSpace = typename KeyViewType::execution_space,
class SizeType = typename KeyViewType::memory_space::size_type>
class BinSort {
public:
template<class ValuesViewType, class PermuteViewType, class CopyOp>
struct bin_sort_sort_functor {
typedef ExecutionSpace execution_space;
typedef typename ValuesViewType::non_const_type values_view_type;
typedef typename ValuesViewType::const_type const_values_view_type;
Kokkos::View<typename values_view_type::const_data_type,typename values_view_type::array_layout,
typename values_view_type::memory_space,Kokkos::MemoryTraits<Kokkos::RandomAccess> > values;
values_view_type sorted_values;
typename PermuteViewType::const_type sort_order;
bin_sort_sort_functor(const_values_view_type values_, values_view_type sorted_values_, PermuteViewType sort_order_):
values(values_),sorted_values(sorted_values_),sort_order(sort_order_) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
//printf("Sort: %i %i\n",i,sort_order(i));
CopyOp::copy(sorted_values,i,values,sort_order(i));
}
};
typedef ExecutionSpace execution_space;
typedef BinSortOp bin_op_type;
struct bin_count_tag {};
struct bin_offset_tag {};
struct bin_binning_tag {};
struct bin_sort_bins_tag {};
public:
typedef SizeType size_type;
typedef size_type value_type;
typedef Kokkos::View<size_type*, execution_space> offset_type;
typedef Kokkos::View<const int*, execution_space> bin_count_type;
typedef Kokkos::View<typename KeyViewType::const_data_type,
typename KeyViewType::array_layout,
typename KeyViewType::memory_space> const_key_view_type;
typedef Kokkos::View<typename KeyViewType::const_data_type,
typename KeyViewType::array_layout,
typename KeyViewType::memory_space,
Kokkos::MemoryTraits<Kokkos::RandomAccess> > const_rnd_key_view_type;
typedef typename KeyViewType::non_const_value_type non_const_key_scalar;
typedef typename KeyViewType::const_value_type const_key_scalar;
private:
const_key_view_type keys;
const_rnd_key_view_type keys_rnd;
public:
BinSortOp bin_op;
offset_type bin_offsets;
Kokkos::View<int*, ExecutionSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > bin_count_atomic;
bin_count_type bin_count_const;
offset_type sort_order;
bool sort_within_bins;
public:
// Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false)
BinSort(const_key_view_type keys_, BinSortOp bin_op_,
bool sort_within_bins_ = false)
:keys(keys_),keys_rnd(keys_), bin_op(bin_op_) {
bin_count_atomic = Kokkos::View<int*, ExecutionSpace >("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins());
bin_count_const = bin_count_atomic;
bin_offsets = offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins());
sort_order = offset_type("PermutationVector",keys.dimension_0());
sort_within_bins = sort_within_bins_;
}
// Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed
void create_permute_vector() {
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_count_tag> (0,keys.dimension_0()),*this);
Kokkos::parallel_scan(Kokkos::RangePolicy<ExecutionSpace,bin_offset_tag> (0,bin_op.max_bins()) ,*this);
Kokkos::deep_copy(bin_count_atomic,0);
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_binning_tag> (0,keys.dimension_0()),*this);
if(sort_within_bins)
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this);
}
// Sort a view with respect ot the first dimension using the permutation array
template<class ValuesViewType>
void sort(ValuesViewType values) {
ValuesViewType sorted_values = ValuesViewType("Copy",
values.dimension_0(),
values.dimension_1(),
values.dimension_2(),
values.dimension_3(),
values.dimension_4(),
values.dimension_5(),
values.dimension_6(),
values.dimension_7());
parallel_for(values.dimension_0(),
bin_sort_sort_functor<ValuesViewType, offset_type,
SortImpl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
deep_copy(values,sorted_values);
}
// Get the permutation vector
KOKKOS_INLINE_FUNCTION
offset_type get_permute_vector() const { return sort_order;}
// Get the start offsets for each bin
KOKKOS_INLINE_FUNCTION
offset_type get_bin_offsets() const { return bin_offsets;}
// Get the count for each bin
KOKKOS_INLINE_FUNCTION
bin_count_type get_bin_count() const {return bin_count_const;}
public:
KOKKOS_INLINE_FUNCTION
void operator() (const bin_count_tag& tag, const int& i) const {
bin_count_atomic(bin_op.bin(keys,i))++;
}
KOKKOS_INLINE_FUNCTION
void operator() (const bin_offset_tag& tag, const int& i, value_type& offset, const bool& final) const {
if(final) {
bin_offsets(i) = offset;
}
offset+=bin_count_const(i);
}
KOKKOS_INLINE_FUNCTION
void operator() (const bin_binning_tag& tag, const int& i) const {
const int bin = bin_op.bin(keys,i);
const int count = bin_count_atomic(bin)++;
sort_order(bin_offsets(bin) + count) = i;
}
KOKKOS_INLINE_FUNCTION
void operator() (const bin_sort_bins_tag& tag, const int&i ) const {
bool sorted = false;
int upper_bound = bin_offsets(i)+bin_count_const(i);
while(!sorted) {
sorted = true;
int old_idx = sort_order(bin_offsets(i));
int new_idx;
for(int k=bin_offsets(i)+1; k<upper_bound; k++) {
new_idx = sort_order(k);
if(!bin_op(keys_rnd,old_idx,new_idx)) {
sort_order(k-1) = new_idx;
sort_order(k) = old_idx;
sorted = false;
} else {
old_idx = new_idx;
}
}
upper_bound--;
}
}
};
namespace SortImpl {
template<class KeyViewType>
struct DefaultBinOp1D {
const int max_bins_;
const double mul_;
typename KeyViewType::const_value_type range_;
typename KeyViewType::const_value_type min_;
//Construct BinOp with number of bins, minimum value and maxuimum value
DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
typename KeyViewType::const_value_type max )
:max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {}
//Determine bin index from key value
template<class ViewType>
KOKKOS_INLINE_FUNCTION
int bin(ViewType& keys, const int& i) const {
return int(mul_*(keys(i)-min_));
}
//Return maximum bin index + 1
KOKKOS_INLINE_FUNCTION
int max_bins() const {
return max_bins_;
}
//Compare to keys within a bin if true new_val will be put before old_val
template<class ViewType, typename iType1, typename iType2>
KOKKOS_INLINE_FUNCTION
bool operator()(ViewType& keys, iType1& i1, iType2& i2) const {
return keys(i1)<keys(i2);
}
};
template<class KeyViewType>
struct DefaultBinOp3D {
int max_bins_[3];
double mul_[3];
typename KeyViewType::non_const_value_type range_[3];
typename KeyViewType::non_const_value_type min_[3];
DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
typename KeyViewType::const_value_type max[] )
{
max_bins_[0] = max_bins__[0]+1;
max_bins_[1] = max_bins__[1]+1;
max_bins_[2] = max_bins__[2]+1;
mul_[0] = 1.0*max_bins__[0]/(max[0]-min[0]);
mul_[1] = 1.0*max_bins__[1]/(max[1]-min[1]);
mul_[2] = 1.0*max_bins__[2]/(max[2]-min[2]);
range_[0] = max[0]-min[0];
range_[1] = max[1]-min[1];
range_[2] = max[2]-min[2];
min_[0] = min[0];
min_[1] = min[1];
min_[2] = min[2];
}
template<class ViewType>
KOKKOS_INLINE_FUNCTION
int bin(ViewType& keys, const int& i) const {
return int( (((int(mul_[0]*(keys(i,0)-min_[0]))*max_bins_[1]) +
int(mul_[1]*(keys(i,1)-min_[1])))*max_bins_[2]) +
int(mul_[2]*(keys(i,2)-min_[2])));
}
KOKKOS_INLINE_FUNCTION
int max_bins() const {
return max_bins_[0]*max_bins_[1]*max_bins_[2];
}
template<class ViewType, typename iType1, typename iType2>
KOKKOS_INLINE_FUNCTION
bool operator()(ViewType& keys, iType1& i1 , iType2& i2) const {
if (keys(i1,0)>keys(i2,0)) return true;
else if (keys(i1,0)==keys(i2,0)) {
if (keys(i1,1)>keys(i2,1)) return true;
else if (keys(i1,1)==keys(i2,2)) {
if (keys(i1,2)>keys(i2,2)) return true;
}
}
return false;
}
};
template<typename Scalar>
struct min_max {
Scalar min;
Scalar max;
bool init;
KOKKOS_INLINE_FUNCTION
min_max() {
min = 0;
max = 0;
init = 0;
}
KOKKOS_INLINE_FUNCTION
min_max (const min_max& val) {
min = val.min;
max = val.max;
init = val.init;
}
KOKKOS_INLINE_FUNCTION
min_max operator = (const min_max& val) {
min = val.min;
max = val.max;
init = val.init;
return *this;
}
KOKKOS_INLINE_FUNCTION
void operator+= (const Scalar& val) {
if(init) {
min = min<val?min:val;
max = max>val?max:val;
} else {
min = val;
max = val;
init = 1;
}
}
KOKKOS_INLINE_FUNCTION
void operator+= (const min_max& val) {
if(init && val.init) {
min = min<val.min?min:val.min;
max = max>val.max?max:val.max;
} else {
if(val.init) {
min = val.min;
max = val.max;
init = 1;
}
}
}
KOKKOS_INLINE_FUNCTION
void operator+= (volatile const Scalar& val) volatile {
if(init) {
min = min<val?min:val;
max = max>val?max:val;
} else {
min = val;
max = val;
init = 1;
}
}
KOKKOS_INLINE_FUNCTION
void operator+= (volatile const min_max& val) volatile {
if(init && val.init) {
min = min<val.min?min:val.min;
max = max>val.max?max:val.max;
} else {
if(val.init) {
min = val.min;
max = val.max;
init = 1;
}
}
}
};
template<class ViewType>
struct min_max_functor {
typedef typename ViewType::execution_space execution_space;
ViewType view;
typedef min_max<typename ViewType::non_const_value_type> value_type;
min_max_functor (const ViewType view_):view(view_) {
}
KOKKOS_INLINE_FUNCTION
void operator()(const size_t& i, value_type& val) const {
val += view(i);
}
};
template<class ViewType>
bool try_std_sort(ViewType view) {
bool possible = true;
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
size_t stride[8];
view.stride(stride);
#else
size_t stride[8] = { view.stride_0()
, view.stride_1()
, view.stride_2()
, view.stride_3()
, view.stride_4()
, view.stride_5()
, view.stride_6()
, view.stride_7()
};
#endif
possible = possible && Impl::is_same<typename ViewType::memory_space, HostSpace>::value;
possible = possible && (ViewType::Rank == 1);
possible = possible && (stride[0] == 1);
if(possible) {
std::sort(view.ptr_on_device(),view.ptr_on_device()+view.dimension_0());
}
return possible;
}
}
template<class ViewType>
void sort(ViewType view, bool always_use_kokkos_sort = false) {
if(!always_use_kokkos_sort) {
if(SortImpl::try_std_sort(view)) return;
}
typedef SortImpl::DefaultBinOp1D<ViewType> CompType;
SortImpl::min_max<typename ViewType::non_const_value_type> val;
parallel_reduce(view.dimension_0(),SortImpl::min_max_functor<ViewType>(view),val);
BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true);
bin_sort.create_permute_vector();
bin_sort.sort(view);
}
/*template<class ViewType, class Comparator>
void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) {
}*/
}
#endif

View File

@ -1,92 +0,0 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests
TEST_TARGETS =
TARGETS =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_Cuda
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_Threads
TEST_TARGETS += test-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_OpenMP
TEST_TARGETS += test-openmp
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_Serial
TEST_TARGETS += test-serial
endif
KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Cuda
KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Threads
KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_OpenMP
KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Serial
test-cuda: KokkosAlgorithms_UnitTest_Cuda
./KokkosAlgorithms_UnitTest_Cuda
test-threads: KokkosAlgorithms_UnitTest_Threads
./KokkosAlgorithms_UnitTest_Threads
test-openmp: KokkosAlgorithms_UnitTest_OpenMP
./KokkosAlgorithms_UnitTest_OpenMP
test-serial: KokkosAlgorithms_UnitTest_Serial
./KokkosAlgorithms_UnitTest_Serial
build_all: $(TARGETS)
test: $(TEST_TARGETS)
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc

View File

@ -1,110 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdint.h>
#include <iostream>
#include <iomanip>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#ifdef KOKKOS_HAVE_CUDA
#include <TestRandom.hpp>
#include <TestSort.hpp>
namespace Test {
class cuda : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase()
{
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};
void cuda_test_random_xorshift64( int num_draws )
{
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Cuda> >(num_draws);
}
void cuda_test_random_xorshift1024( int num_draws )
{
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Cuda> >(num_draws);
}
#define CUDA_RANDOM_XORSHIFT64( num_draws ) \
TEST_F( cuda, Random_XorShift64 ) { \
cuda_test_random_xorshift64(num_draws); \
}
#define CUDA_RANDOM_XORSHIFT1024( num_draws ) \
TEST_F( cuda, Random_XorShift1024 ) { \
cuda_test_random_xorshift1024(num_draws); \
}
#define CUDA_SORT_UNSIGNED( size ) \
TEST_F( cuda, SortUnsigned ) { \
Impl::test_sort< Kokkos::Cuda, unsigned >(size); \
}
CUDA_RANDOM_XORSHIFT64( 132141141 )
CUDA_RANDOM_XORSHIFT1024( 52428813 )
CUDA_SORT_UNSIGNED(171)
#undef CUDA_RANDOM_XORSHIFT64
#undef CUDA_RANDOM_XORSHIFT1024
#undef CUDA_SORT_UNSIGNED
}
#endif /* #ifdef KOKKOS_HAVE_CUDA */

View File

@ -1,102 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
namespace Test {
#ifdef KOKKOS_HAVE_OPENMP
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned threads_count = omp_get_max_threads();
if ( Kokkos::hwloc::available() ) {
threads_count = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa();
}
Kokkos::OpenMP::initialize( threads_count );
}
static void TearDownTestCase()
{
Kokkos::OpenMP::finalize();
}
};
#define OPENMP_RANDOM_XORSHIFT64( num_draws ) \
TEST_F( openmp, Random_XorShift64 ) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::OpenMP> >(num_draws); \
}
#define OPENMP_RANDOM_XORSHIFT1024( num_draws ) \
TEST_F( openmp, Random_XorShift1024 ) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::OpenMP> >(num_draws); \
}
#define OPENMP_SORT_UNSIGNED( size ) \
TEST_F( openmp, SortUnsigned ) { \
Impl::test_sort< Kokkos::OpenMP, unsigned >(size); \
}
OPENMP_RANDOM_XORSHIFT64( 10240000 )
OPENMP_RANDOM_XORSHIFT1024( 10130144 )
OPENMP_SORT_UNSIGNED(171)
#undef OPENMP_RANDOM_XORSHIFT64
#undef OPENMP_RANDOM_XORSHIFT1024
#undef OPENMP_SORT_UNSIGNED
#endif
} // namespace test

View File

@ -1,476 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_DUALVIEW_HPP
#define KOKKOS_TEST_DUALVIEW_HPP
#include <gtest/gtest.h>
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <cmath>
namespace Test {
namespace Impl{
// This test runs the random number generators and uses some statistic tests to
// check the 'goodness' of the random numbers:
// (i) mean: the mean is expected to be 0.5*RAND_MAX
// (ii) variance: the variance is 1/3*mean*mean
// (iii) covariance: the covariance is 0
// (iv) 1-tupledistr: the mean, variance and covariance of a 1D Histrogram of random numbers
// (v) 3-tupledistr: the mean, variance and covariance of a 3D Histrogram of random numbers
#define HIST_DIM3D 24
#define HIST_DIM1D (HIST_DIM3D*HIST_DIM3D*HIST_DIM3D)
struct RandomProperties {
uint64_t count;
double mean;
double variance;
double covariance;
double min;
double max;
KOKKOS_INLINE_FUNCTION
RandomProperties() {
count = 0;
mean = 0.0;
variance = 0.0;
covariance = 0.0;
min = 1e64;
max = -1e64;
}
KOKKOS_INLINE_FUNCTION
RandomProperties& operator+=(const RandomProperties& add) {
count += add.count;
mean += add.mean;
variance += add.variance;
covariance += add.covariance;
min = add.min<min?add.min:min;
max = add.max>max?add.max:max;
return *this;
}
KOKKOS_INLINE_FUNCTION
void operator+=(const volatile RandomProperties& add) volatile {
count += add.count;
mean += add.mean;
variance += add.variance;
covariance += add.covariance;
min = add.min<min?add.min:min;
max = add.max>max?add.max:max;
}
};
template<class GeneratorPool, class Scalar>
struct test_random_functor {
typedef typename GeneratorPool::generator_type rnd_type;
typedef RandomProperties value_type;
typedef typename GeneratorPool::device_type device_type;
GeneratorPool rand_pool;
const double mean;
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
// an exclusive upper bound on the range of random numbers that
// draw() can generate. However, for the float specialization, some
// implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each
// dimension, in the View types below.
typedef Kokkos::View<int[HIST_DIM1D+1],typename GeneratorPool::device_type> type_1d;
type_1d density_1d;
typedef Kokkos::View<int[HIST_DIM3D+1][HIST_DIM3D+1][HIST_DIM3D+1],typename GeneratorPool::device_type> type_3d;
type_3d density_3d;
test_random_functor (GeneratorPool rand_pool_, type_1d d1d, type_3d d3d) :
rand_pool (rand_pool_),
mean (0.5*Kokkos::rand<rnd_type,Scalar>::max ()),
density_1d (d1d),
density_3d (d3d)
{}
KOKKOS_INLINE_FUNCTION
void operator() (int i, RandomProperties& prop) const {
using Kokkos::atomic_fetch_add;
rnd_type rand_gen = rand_pool.get_state();
for (int k = 0; k < 1024; ++k) {
const Scalar tmp = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
prop.count++;
prop.mean += tmp;
prop.variance += (tmp-mean)*(tmp-mean);
const Scalar tmp2 = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
prop.count++;
prop.mean += tmp2;
prop.variance += (tmp2-mean)*(tmp2-mean);
prop.covariance += (tmp-mean)*(tmp2-mean);
const Scalar tmp3 = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
prop.count++;
prop.mean += tmp3;
prop.variance += (tmp3-mean)*(tmp3-mean);
prop.covariance += (tmp2-mean)*(tmp3-mean);
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to
// define an exclusive upper bound on the range of random
// numbers that draw() can generate. However, for the float
// specialization, some implementations might violate this upper
// bound, due to rounding error. Just in case, we have left an
// extra space at the end of each dimension of density_1d and
// density_3d.
//
// Please note that those extra entries might not get counted in
// the histograms. However, if Kokkos::rand is broken and only
// returns values of max(), the histograms will still catch this
// indirectly, since none of the other values will be filled in.
const Scalar theMax = Kokkos::rand<rnd_type, Scalar>::max ();
const uint64_t ind1_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp / theMax);
const uint64_t ind2_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp2 / theMax);
const uint64_t ind3_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp3 / theMax);
const uint64_t ind1_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp / theMax);
const uint64_t ind2_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp2 / theMax);
const uint64_t ind3_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp3 / theMax);
atomic_fetch_add (&density_1d(ind1_1d), 1);
atomic_fetch_add (&density_1d(ind2_1d), 1);
atomic_fetch_add (&density_1d(ind3_1d), 1);
atomic_fetch_add (&density_3d(ind1_3d, ind2_3d, ind3_3d), 1);
}
rand_pool.free_state(rand_gen);
}
};
template<class DeviceType>
struct test_histogram1d_functor {
typedef RandomProperties value_type;
typedef typename DeviceType::execution_space execution_space;
typedef typename DeviceType::memory_space memory_space;
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
// an exclusive upper bound on the range of random numbers that
// draw() can generate. However, for the float specialization, some
// implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each
// dimension, in the View type below.
typedef Kokkos::View<int[HIST_DIM1D+1], memory_space> type_1d;
type_1d density_1d;
double mean;
test_histogram1d_functor (type_1d d1d, int num_draws) :
density_1d (d1d),
mean (1.0*num_draws/HIST_DIM1D*3)
{
printf ("Mean: %e\n", mean);
}
KOKKOS_INLINE_FUNCTION void
operator() (const typename memory_space::size_type i,
RandomProperties& prop) const
{
typedef typename memory_space::size_type size_type;
const double count = density_1d(i);
prop.mean += count;
prop.variance += 1.0 * (count - mean) * (count - mean);
//prop.covariance += 1.0*count*count;
prop.min = count < prop.min ? count : prop.min;
prop.max = count > prop.max ? count : prop.max;
if (i < static_cast<size_type> (HIST_DIM1D-1)) {
prop.covariance += (count - mean) * (density_1d(i+1) - mean);
}
}
};
template<class DeviceType>
struct test_histogram3d_functor {
typedef RandomProperties value_type;
typedef typename DeviceType::execution_space execution_space;
typedef typename DeviceType::memory_space memory_space;
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
// an exclusive upper bound on the range of random numbers that
// draw() can generate. However, for the float specialization, some
// implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each
// dimension, in the View type below.
typedef Kokkos::View<int[HIST_DIM3D+1][HIST_DIM3D+1][HIST_DIM3D+1], memory_space> type_3d;
type_3d density_3d;
double mean;
test_histogram3d_functor (type_3d d3d, int num_draws) :
density_3d (d3d),
mean (1.0*num_draws/HIST_DIM1D)
{}
KOKKOS_INLINE_FUNCTION void
operator() (const typename memory_space::size_type i,
RandomProperties& prop) const
{
typedef typename memory_space::size_type size_type;
const double count = density_3d(i/(HIST_DIM3D*HIST_DIM3D),
(i % (HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D,
i % HIST_DIM3D);
prop.mean += count;
prop.variance += (count - mean) * (count - mean);
if (i < static_cast<size_type> (HIST_DIM1D-1)) {
const double count_next = density_3d((i+1)/(HIST_DIM3D*HIST_DIM3D),
((i+1)%(HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D,
(i+1)%HIST_DIM3D);
prop.covariance += (count - mean) * (count_next - mean);
}
}
};
//
// Templated test that uses the above functors.
//
template <class RandomGenerator,class Scalar>
struct test_random_scalar {
typedef typename RandomGenerator::generator_type rnd_type;
int pass_mean,pass_var,pass_covar;
int pass_hist1d_mean,pass_hist1d_var,pass_hist1d_covar;
int pass_hist3d_mean,pass_hist3d_var,pass_hist3d_covar;
test_random_scalar (typename test_random_functor<RandomGenerator,int>::type_1d& density_1d,
typename test_random_functor<RandomGenerator,int>::type_3d& density_3d,
RandomGenerator& pool,
unsigned int num_draws)
{
using std::cerr;
using std::endl;
using Kokkos::parallel_reduce;
{
cerr << " -- Testing randomness properties" << endl;
RandomProperties result;
typedef test_random_functor<RandomGenerator, Scalar> functor_type;
parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result);
//printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2);
double tolerance = 2.0*sqrt(1.0/num_draws);
double mean_expect = 0.5*Kokkos::rand<rnd_type,Scalar>::max();
double variance_expect = 1.0/3.0*mean_expect*mean_expect;
double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0;
double variance_eps = variance_expect/(result.variance/num_draws/3)-1.0;
double covariance_eps = result.covariance/num_draws/2/variance_expect;
pass_mean = ((-tolerance < mean_eps) &&
( tolerance > mean_eps)) ? 1:0;
pass_var = ((-tolerance < variance_eps) &&
( tolerance > variance_eps)) ? 1:0;
pass_covar = ((-1.4*tolerance < covariance_eps) &&
( 1.4*tolerance > covariance_eps)) ? 1:0;
cerr << "Pass: " << pass_mean
<< " " << pass_var
<< " " << mean_eps
<< " " << variance_eps
<< " " << covariance_eps
<< " || " << tolerance << endl;
}
{
cerr << " -- Testing 1-D histogram" << endl;
RandomProperties result;
typedef test_histogram1d_functor<typename RandomGenerator::device_type> functor_type;
parallel_reduce (HIST_DIM1D, functor_type (density_1d, num_draws), result);
double tolerance = 6*sqrt(1.0/HIST_DIM1D);
double mean_expect = 1.0*num_draws*3/HIST_DIM1D;
double variance_expect = 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D);
double covariance_expect = -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D;
double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0;
double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0;
double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect;
pass_hist1d_mean = ((-tolerance < mean_eps) &&
( tolerance > mean_eps)) ? 1:0;
pass_hist1d_var = ((-tolerance < variance_eps) &&
( tolerance > variance_eps)) ? 1:0;
pass_hist1d_covar = ((-tolerance < covariance_eps) &&
( tolerance > covariance_eps)) ? 1:0;
cerr << "Density 1D: " << mean_eps
<< " " << variance_eps
<< " " << (result.covariance/HIST_DIM1D/HIST_DIM1D)
<< " || " << tolerance
<< " " << result.min
<< " " << result.max
<< " || " << result.variance/HIST_DIM1D
<< " " << 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D)
<< " || " << result.covariance/HIST_DIM1D
<< " " << -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D
<< endl;
}
{
cerr << " -- Testing 3-D histogram" << endl;
RandomProperties result;
typedef test_histogram3d_functor<typename RandomGenerator::device_type> functor_type;
parallel_reduce (HIST_DIM1D, functor_type (density_3d, num_draws), result);
double tolerance = 6*sqrt(1.0/HIST_DIM1D);
double mean_expect = 1.0*num_draws/HIST_DIM1D;
double variance_expect = 1.0*num_draws/HIST_DIM1D*(1.0-1.0/HIST_DIM1D);
double covariance_expect = -1.0*num_draws/HIST_DIM1D/HIST_DIM1D;
double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0;
double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0;
double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect;
pass_hist3d_mean = ((-tolerance < mean_eps) &&
( tolerance > mean_eps)) ? 1:0;
pass_hist3d_var = ((-tolerance < variance_eps) &&
( tolerance > variance_eps)) ? 1:0;
pass_hist3d_covar = ((-tolerance < covariance_eps) &&
( tolerance > covariance_eps)) ? 1:0;
cerr << "Density 3D: " << mean_eps
<< " " << variance_eps
<< " " << result.covariance/HIST_DIM1D/HIST_DIM1D
<< " || " << tolerance
<< " " << result.min
<< " " << result.max << endl;
}
}
};
template <class RandomGenerator>
void test_random(unsigned int num_draws)
{
using std::cerr;
using std::endl;
typename test_random_functor<RandomGenerator,int>::type_1d density_1d("D1d");
typename test_random_functor<RandomGenerator,int>::type_3d density_3d("D3d");
cerr << "Test Scalar=int" << endl;
RandomGenerator pool(31891);
test_random_scalar<RandomGenerator,int> test_int(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_int.pass_mean,1);
ASSERT_EQ( test_int.pass_var,1);
ASSERT_EQ( test_int.pass_covar,1);
ASSERT_EQ( test_int.pass_hist1d_mean,1);
ASSERT_EQ( test_int.pass_hist1d_var,1);
ASSERT_EQ( test_int.pass_hist1d_covar,1);
ASSERT_EQ( test_int.pass_hist3d_mean,1);
ASSERT_EQ( test_int.pass_hist3d_var,1);
ASSERT_EQ( test_int.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=unsigned int" << endl;
test_random_scalar<RandomGenerator,unsigned int> test_uint(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_uint.pass_mean,1);
ASSERT_EQ( test_uint.pass_var,1);
ASSERT_EQ( test_uint.pass_covar,1);
ASSERT_EQ( test_uint.pass_hist1d_mean,1);
ASSERT_EQ( test_uint.pass_hist1d_var,1);
ASSERT_EQ( test_uint.pass_hist1d_covar,1);
ASSERT_EQ( test_uint.pass_hist3d_mean,1);
ASSERT_EQ( test_uint.pass_hist3d_var,1);
ASSERT_EQ( test_uint.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=int64_t" << endl;
test_random_scalar<RandomGenerator,int64_t> test_int64(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_int64.pass_mean,1);
ASSERT_EQ( test_int64.pass_var,1);
ASSERT_EQ( test_int64.pass_covar,1);
ASSERT_EQ( test_int64.pass_hist1d_mean,1);
ASSERT_EQ( test_int64.pass_hist1d_var,1);
ASSERT_EQ( test_int64.pass_hist1d_covar,1);
ASSERT_EQ( test_int64.pass_hist3d_mean,1);
ASSERT_EQ( test_int64.pass_hist3d_var,1);
ASSERT_EQ( test_int64.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=uint64_t" << endl;
test_random_scalar<RandomGenerator,uint64_t> test_uint64(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_uint64.pass_mean,1);
ASSERT_EQ( test_uint64.pass_var,1);
ASSERT_EQ( test_uint64.pass_covar,1);
ASSERT_EQ( test_uint64.pass_hist1d_mean,1);
ASSERT_EQ( test_uint64.pass_hist1d_var,1);
ASSERT_EQ( test_uint64.pass_hist1d_covar,1);
ASSERT_EQ( test_uint64.pass_hist3d_mean,1);
ASSERT_EQ( test_uint64.pass_hist3d_var,1);
ASSERT_EQ( test_uint64.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=float" << endl;
test_random_scalar<RandomGenerator,float> test_float(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_float.pass_mean,1);
ASSERT_EQ( test_float.pass_var,1);
ASSERT_EQ( test_float.pass_covar,1);
ASSERT_EQ( test_float.pass_hist1d_mean,1);
ASSERT_EQ( test_float.pass_hist1d_var,1);
ASSERT_EQ( test_float.pass_hist1d_covar,1);
ASSERT_EQ( test_float.pass_hist3d_mean,1);
ASSERT_EQ( test_float.pass_hist3d_var,1);
ASSERT_EQ( test_float.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=double" << endl;
test_random_scalar<RandomGenerator,double> test_double(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_double.pass_mean,1);
ASSERT_EQ( test_double.pass_var,1);
ASSERT_EQ( test_double.pass_covar,1);
ASSERT_EQ( test_double.pass_hist1d_mean,1);
ASSERT_EQ( test_double.pass_hist1d_var,1);
ASSERT_EQ( test_double.pass_hist1d_covar,1);
ASSERT_EQ( test_double.pass_hist3d_mean,1);
ASSERT_EQ( test_double.pass_hist3d_var,1);
ASSERT_EQ( test_double.pass_hist3d_covar,1);
}
}
} // namespace Test
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP

View File

@ -1,99 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
//----------------------------------------------------------------------------
namespace Test {
#ifdef KOKKOS_HAVE_SERIAL
class serial : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision (5) << std::scientific;
Kokkos::Serial::initialize ();
}
static void TearDownTestCase ()
{
Kokkos::Serial::finalize ();
}
};
#define SERIAL_RANDOM_XORSHIFT64( num_draws ) \
TEST_F( serial, Random_XorShift64 ) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Serial> >(num_draws); \
}
#define SERIAL_RANDOM_XORSHIFT1024( num_draws ) \
TEST_F( serial, Random_XorShift1024 ) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Serial> >(num_draws); \
}
#define SERIAL_SORT_UNSIGNED( size ) \
TEST_F( serial, SortUnsigned ) { \
Impl::test_sort< Kokkos::Serial, unsigned >(size); \
}
SERIAL_RANDOM_XORSHIFT64( 10240000 )
SERIAL_RANDOM_XORSHIFT1024( 10130144 )
SERIAL_SORT_UNSIGNED(171)
#undef SERIAL_RANDOM_XORSHIFT64
#undef SERIAL_RANDOM_XORSHIFT1024
#undef SERIAL_SORT_UNSIGNED
#endif // KOKKOS_HAVE_SERIAL
} // namespace Test

View File

@ -1,206 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef TESTSORT_HPP_
#define TESTSORT_HPP_
#include <gtest/gtest.h>
#include<Kokkos_Core.hpp>
#include<Kokkos_Random.hpp>
#include<Kokkos_Sort.hpp>
namespace Test {
namespace Impl{
template<class ExecutionSpace, class Scalar>
struct is_sorted_struct {
typedef unsigned int value_type;
typedef ExecutionSpace execution_space;
Kokkos::View<Scalar*,ExecutionSpace> keys;
is_sorted_struct(Kokkos::View<Scalar*,ExecutionSpace> keys_):keys(keys_) {}
KOKKOS_INLINE_FUNCTION
void operator() (int i, unsigned int& count) const {
if(keys(i)>keys(i+1)) count++;
}
};
template<class ExecutionSpace, class Scalar>
struct sum {
typedef double value_type;
typedef ExecutionSpace execution_space;
Kokkos::View<Scalar*,ExecutionSpace> keys;
sum(Kokkos::View<Scalar*,ExecutionSpace> keys_):keys(keys_) {}
KOKKOS_INLINE_FUNCTION
void operator() (int i, double& count) const {
count+=keys(i);
}
};
template<class ExecutionSpace, class Scalar>
struct bin3d_is_sorted_struct {
typedef unsigned int value_type;
typedef ExecutionSpace execution_space;
Kokkos::View<Scalar*[3],ExecutionSpace> keys;
int max_bins;
Scalar min;
Scalar max;
bin3d_is_sorted_struct(Kokkos::View<Scalar*[3],ExecutionSpace> keys_,int max_bins_,Scalar min_,Scalar max_):
keys(keys_),max_bins(max_bins_),min(min_),max(max_) {
}
KOKKOS_INLINE_FUNCTION
void operator() (int i, unsigned int& count) const {
int ix1 = int ((keys(i,0)-min)/max * max_bins);
int iy1 = int ((keys(i,1)-min)/max * max_bins);
int iz1 = int ((keys(i,2)-min)/max * max_bins);
int ix2 = int ((keys(i+1,0)-min)/max * max_bins);
int iy2 = int ((keys(i+1,1)-min)/max * max_bins);
int iz2 = int ((keys(i+1,2)-min)/max * max_bins);
if (ix1>ix2) count++;
else if(ix1==ix2) {
if (iy1>iy2) count++;
else if ((iy1==iy2) && (iz1>iz2)) count++;
}
}
};
template<class ExecutionSpace, class Scalar>
struct sum3D {
typedef double value_type;
typedef ExecutionSpace execution_space;
Kokkos::View<Scalar*[3],ExecutionSpace> keys;
sum3D(Kokkos::View<Scalar*[3],ExecutionSpace> keys_):keys(keys_) {}
KOKKOS_INLINE_FUNCTION
void operator() (int i, double& count) const {
count+=keys(i,0);
count+=keys(i,1);
count+=keys(i,2);
}
};
template<class ExecutionSpace, typename KeyType>
void test_1D_sort(unsigned int n,bool force_kokkos) {
typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType;
KeyViewType keys("Keys",n);
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND);
double sum_before = 0.0;
double sum_after = 0.0;
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys),sum_before);
Kokkos::sort(keys,force_kokkos);
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys),sum_after);
Kokkos::parallel_reduce(n-1,is_sorted_struct<ExecutionSpace, KeyType>(keys),sort_fails);
double ratio = sum_before/sum_after;
double epsilon = 1e-10;
unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0;
ASSERT_EQ(sort_fails,0);
ASSERT_EQ(equal_sum,1);
}
template<class ExecutionSpace, typename KeyType>
void test_3D_sort(unsigned int n) {
typedef Kokkos::View<KeyType*[3],ExecutionSpace > KeyViewType;
KeyViewType keys("Keys",n*n*n);
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
Kokkos::fill_random(keys,g,100.0);
double sum_before = 0.0;
double sum_after = 0.0;
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_before);
int bin_1d = 1;
while( bin_1d*bin_1d*bin_1d*4< (int) keys.dimension_0() ) bin_1d*=2;
int bin_max[3] = {bin_1d,bin_1d,bin_1d};
typename KeyViewType::value_type min[3] = {0,0,0};
typename KeyViewType::value_type max[3] = {100,100,100};
typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp;
BinOp bin_op(bin_max,min,max);
Kokkos::BinSort< KeyViewType , BinOp >
Sorter(keys,bin_op,false);
Sorter.create_permute_vector();
Sorter.template sort< KeyViewType >(keys);
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_after);
Kokkos::parallel_reduce(keys.dimension_0()-1,bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys,bin_1d,min[0],max[0]),sort_fails);
double ratio = sum_before/sum_after;
double epsilon = 1e-10;
unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0;
printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails);
ASSERT_EQ(sort_fails,0);
ASSERT_EQ(equal_sum,1);
}
template<class ExecutionSpace, typename KeyType>
void test_sort(unsigned int N)
{
test_1D_sort<ExecutionSpace,KeyType>(N*N*N, true);
test_1D_sort<ExecutionSpace,KeyType>(N*N*N, false);
test_3D_sort<ExecutionSpace,KeyType>(N);
}
}
}
#endif /* TESTSORT_HPP_ */

View File

@ -1,113 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
//----------------------------------------------------------------------------
namespace Test {
#ifdef KOKKOS_HAVE_PTHREAD
class threads : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count()
* Kokkos::hwloc::get_available_cores_per_numa()
// * Kokkos::hwloc::get_available_threads_per_core()
;
}
std::cout << "Threads: " << num_threads << std::endl;
Kokkos::Threads::initialize( num_threads );
}
static void TearDownTestCase()
{
Kokkos::Threads::finalize();
}
};
#define THREADS_RANDOM_XORSHIFT64( num_draws ) \
TEST_F( threads, Random_XorShift64 ) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Threads> >(num_draws); \
}
#define THREADS_RANDOM_XORSHIFT1024( num_draws ) \
TEST_F( threads, Random_XorShift1024 ) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Threads> >(num_draws); \
}
#define THREADS_SORT_UNSIGNED( size ) \
TEST_F( threads, SortUnsigned ) { \
Impl::test_sort< Kokkos::Threads, double >(size); \
}
THREADS_RANDOM_XORSHIFT64( 10240000 )
THREADS_RANDOM_XORSHIFT1024( 10130144 )
THREADS_SORT_UNSIGNED(171)
#undef THREADS_RANDOM_XORSHIFT64
#undef THREADS_RANDOM_XORSHIFT1024
#undef THREADS_SORT_UNSIGNED
#endif
} // namespace Test

View File

@ -1,50 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
int main(int argc, char *argv[]) {
::testing::InitGoogleTest(&argc,argv);
return RUN_ALL_TESTS();
}

View File

@ -1,190 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/host/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
CUDA_ARCH=""
# CUDA_ARCH="20"
# CUDA_ARCH="30"
# CUDA_ARCH="35"
# Build with Intel compiler
INTEL=ON
# Build for MIC architecture:
# INTEL_XEON_PHI=ON
# Build with HWLOC at location:
HWLOC_BASE_DIR="/home/projects/libraries/host/hwloc/1.6.2"
# Location for MPI to use in examples:
MPI_BASE_DIR=""
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP configuation:
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,186 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/mic/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
CUDA_ARCH=""
# CUDA_ARCH="20"
# CUDA_ARCH="30"
# CUDA_ARCH="35"
# Build for MIC architecture:
INTEL_XEON_PHI=ON
# Build with HWLOC at location:
HWLOC_BASE_DIR="/home/projects/libraries/mic/hwloc/1.6.2"
# Location for MPI to use in examples:
MPI_BASE_DIR=""
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP configuation:
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,293 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
#-----------------------------------------------------------------------------
USE_CUDA_ARCH=
USE_THREAD=
USE_OPENMP=
USE_INTEL=
USE_XEON_PHI=
HWLOC_BASE_DIR=
MPI_BASE_DIR=
BLAS_LIB_DIR=
LAPACK_LIB_DIR=
if [ 1 ] ; then
# Platform 'kokkos-dev' with Cuda, OpenMP, hwloc, mpi, gnu
USE_CUDA_ARCH="35"
USE_OPENMP=ON
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib"
LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib"
elif [ ] ; then
# Platform 'kokkos-dev' with Cuda, Threads, hwloc, mpi, gnu
USE_CUDA_ARCH="35"
USE_THREAD=ON
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib"
LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib"
elif [ ] ; then
# Platform 'kokkos-dev' with Xeon Phi and hwloc
USE_OPENMP=ON
USE_INTEL=ON
USE_XEON_PHI=ON
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/mic/intel/13.SP1.1.106"
elif [ ] ; then
# Platform 'kokkos-nvidia' with Cuda, OpenMP, hwloc, mpi, gnu
USE_CUDA_ARCH="20"
USE_OPENMP=ON
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
MPI_BASE_DIR="/home/sems/common/openmpi/current"
elif [ ] ; then
# Platform 'kokkos-nvidia' with Cuda, Threads, hwloc, mpi, gnu
USE_CUDA_ARCH="20"
USE_THREAD=ON
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
MPI_BASE_DIR="/home/sems/common/openmpi/current"
fi
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure command line options:
CMAKE_CONFIGURE=""
CMAKE_CXX_FLAGS=""
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
if [ 1 ] ; then
# Configure for Tpetra/Kokkos:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${BLAS_LIB_DIR}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_DIRS:FILEPATH=${LAPACK_LIB_DIR}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Tpetra:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Kokkos:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraClassic:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Tpetra_ENABLE_Kokkos_Refactor:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D KokkosClassic_DefaultNode:STRING=Kokkos::Compat::KokkosOpenMPWrapperNode"
CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}-DKOKKOS_FAST_COMPILE"
if [ -n "${USE_CUDA_ARCH}" ] ; then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Cuda:BOOL=ON"
fi
fi
if [ 1 ] ; then
# Configure for Stokhos:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Sacado:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Stokhos:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Stokhos_ENABLE_Belos:BOOL=ON"
fi
if [ 1 ] ; then
# Configure for TrilinosCouplings:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TrilinosCouplings:BOOL=ON"
fi
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON"
if [ "${CMAKE_BUILD_TYPE}" == "DEBUG" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
fi
#-----------------------------------------------------------------------------
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Kokkos use pthread configuation:
if [ "${USE_THREAD}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Kokkos use OpenMP configuation:
if [ "${USE_OPENMP}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Hardware locality configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${USE_CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-DKOKKOS_HAVE_CUDA_ARCH=${USE_CUDA_ARCH}0;-gencode;arch=compute_${USE_CUDA_ARCH},code=sm_${USE_CUDA_ARCH}"
if [ "${USE_OPENMP}" = "ON" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
fi
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${USE_INTEL}" = "ON" -o "${USE_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
# Cross-compile for Intel Xeon Phi:
if [ "${USE_XEON_PHI}" = "ON" ] ;
then
CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
if [ -n "${CMAKE_CXX_FLAGS}" ] ; then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING='${CMAKE_CXX_FLAGS}'"
fi
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo "cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}"
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,88 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
# to build:
# build on bgq-b[1-12]
# module load sierra-devel
# run this configure file
# make
# to run:
# ssh bgq-login
# cd /scratch/username/...
# export OMP_PROC_BIND and XLSMPOPTS environment variables
# run with srun
# Note: hwloc does not work to get or set cpubindings on bgq.
# Use the openmp backend and the openmp environment variables.
#
# Only the mpi wrappers seem to be setup for cross-compile,
# so it is important that this configure enables MPI and uses mpigcc wrappers.
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="../Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=../TrilinosInstall/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=mpigcc-4.7.2"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=mpig++-4.7.2"
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,216 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${HOME}/TrilinosInstall/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
#CMAKE_BUILD_TYPE=DEBUG
#CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
#CUDA_ARCH=""
#CUDA_ARCH="20"
#CUDA_ARCH="30"
CUDA_ARCH="35"
# Build with OpenMP
OPENMP=ON
PTHREADS=ON
# Build host code with Intel compiler:
INTEL=OFF
# Build for MIC architecture:
INTEL_XEON_PHI=OFF
# Build with HWLOC at location:
#HWLOC_BASE_DIR=""
#HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
# Location for MPI to use in examples:
#MPI_BASE_DIR=""
#MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.7.3"
#MPI_BASE_DIR="/home/projects/openmpi/1.7.3/llvm/2013-12-02/"
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
if [ "${PTHREADS}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# OpenMP configuation:
if [ "${OPENMP}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
if [ "${OPENMP}" = "ON" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
fi
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,204 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/sems/common/kokkos/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
# CUDA_ARCH=""
CUDA_ARCH="20"
# CUDA_ARCH="30"
# CUDA_ARCH="35"
# Build with OpenMP
OPENMP=ON
# Build host code with Intel compiler:
# INTEL=ON
# Build for MIC architecture:
# INTEL_XEON_PHI=ON
# Build with HWLOC at location:
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
# Location for MPI to use in examples:
MPI_BASE_DIR="/home/sems/common/openmpi/current"
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP configuation:
if [ "${OPENMP}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
if [ "${OPENMP}" = "ON" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
fi
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,190 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
# CUDA_ARCH=""
# CUDA_ARCH="20"
# CUDA_ARCH="30"
CUDA_ARCH="35"
# Build host code with Intel compiler:
INTEL=ON
# Build for MIC architecture:
# INTEL_XEON_PHI=ON
# Build with HWLOC at location:
HWLOC_BASE_DIR="/home/projects/hwloc/1.6.2"
# Location for MPI to use in examples:
MPI_BASE_DIR=""
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP configuation:
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,140 +0,0 @@
#!/bin/bash
#
# This script uses CUDA, OpenMP, and MPI.
#
# Before invoking this script, set the OMPI_CXX environment variable
# to point to nvcc_wrapper, wherever it happens to live. (If you use
# an MPI implementation other than OpenMPI, set the corresponding
# environment variable instead.)
#
rm -f CMakeCache.txt;
rm -rf CMakeFiles
EXTRA_ARGS=$@
MPI_PATH="/opt/mpi/openmpi/1.8.2/nvcc-gcc/4.8.3-6.5"
CUDA_PATH="/opt/nvidia/cuda/6.5.14"
#
# As long as there are any .cu files in Trilinos, we'll need to set
# CUDA_NVCC_FLAGS. If Trilinos gets rid of all of its .cu files and
# lets nvcc_wrapper handle them as .cpp files, then we won't need to
# set CUDA_NVCC_FLAGS. As it is, given that we need to set
# CUDA_NVCC_FLAGS, we must make sure that they are the same flags as
# nvcc_wrapper passes to nvcc.
#
CUDA_NVCC_FLAGS="-gencode;arch=compute_35,code=sm_35;-I${MPI_PATH}/include"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3;-DKOKKOS_USE_CUDA_UVM"
cmake \
-D CMAKE_INSTALL_PREFIX:PATH="$PWD/../install/" \
-D CMAKE_BUILD_TYPE:STRING=DEBUG \
-D CMAKE_CXX_FLAGS:STRING="-g -Wall" \
-D CMAKE_C_FLAGS:STRING="-g -Wall" \
-D CMAKE_FORTRAN_FLAGS:STRING="" \
-D CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS="" \
-D Trilinos_ENABLE_Triutils=OFF \
-D Trilinos_ENABLE_INSTALL_CMAKE_CONFIG_FILES:BOOL=OFF \
-D Trilinos_ENABLE_DEBUG:BOOL=OFF \
-D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF \
-D Trilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=OFF \
-D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING="" \
-D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF \
-D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES:BOOL=OFF \
-D BUILD_SHARED_LIBS:BOOL=OFF \
-D DART_TESTING_TIMEOUT:STRING=600 \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=OFF \
\
\
-D CMAKE_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \
-D CMAKE_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \
-D MPI_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \
-D MPI_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \
-D CMAKE_Fortran_COMPILER:FILEPATH="${MPI_PATH}/bin/mpif77" \
-D MPI_EXEC:FILEPATH="${MPI_PATH}/bin/mpirun" \
-D MPI_EXEC_POST_NUMPROCS_FLAGS:STRING="-bind-to;socket;--map-by;socket;env;CUDA_MANAGED_FORCE_DEVICE_ALLOC=1;CUDA_LAUNCH_BLOCKING=1;OMP_NUM_THREADS=2" \
\
\
-D Trilinos_ENABLE_CXX11:BOOL=OFF \
-D TPL_ENABLE_MPI:BOOL=ON \
-D Trilinos_ENABLE_OpenMP:BOOL=ON \
-D Trilinos_ENABLE_ThreadPool:BOOL=ON \
\
\
-D TPL_ENABLE_CUDA:BOOL=ON \
-D CUDA_TOOLKIT_ROOT_DIR:FILEPATH="${CUDA_PATH}" \
-D CUDA_PROPAGATE_HOST_FLAGS:BOOL=OFF \
-D TPL_ENABLE_Thrust:BOOL=OFF \
-D Thrust_INCLUDE_DIRS:FILEPATH="${CUDA_PATH}/include" \
-D TPL_ENABLE_CUSPARSE:BOOL=OFF \
-D TPL_ENABLE_Cusp:BOOL=OFF \
-D Cusp_INCLUDE_DIRS="/home/crtrott/Software/cusp" \
-D CUDA_VERBOSE_BUILD:BOOL=OFF \
-D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS} \
\
\
-D TPL_ENABLE_HWLOC=OFF \
-D HWLOC_INCLUDE_DIRS="/usr/local/software/hwloc/current/include" \
-D HWLOC_LIBRARY_DIRS="/usr/local/software/hwloc/current/lib" \
-D TPL_ENABLE_BinUtils=OFF \
-D TPL_ENABLE_BLAS:STRING=ON \
-D TPL_ENABLE_LAPACK:STRING=ON \
-D TPL_ENABLE_MKL:STRING=OFF \
-D TPL_ENABLE_HWLOC:STRING=OFF \
-D TPL_ENABLE_GTEST:STRING=ON \
-D TPL_ENABLE_SuperLU=ON \
-D TPL_ENABLE_BLAS=ON \
-D TPL_ENABLE_LAPACK=ON \
-D TPL_SuperLU_LIBRARIES="/home/crtrott/Software/SuperLU_4.3/lib/libsuperlu_4.3.a" \
-D TPL_SuperLU_INCLUDE_DIRS="/home/crtrott/Software/SuperLU_4.3/SRC" \
\
\
-D Trilinos_Enable_Kokkos:BOOL=ON \
-D Trilinos_ENABLE_KokkosCore:BOOL=ON \
-D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON \
-D Trilinos_ENABLE_KokkosContainers:BOOL=ON \
-D Trilinos_ENABLE_TpetraKernels:BOOL=ON \
-D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON \
-D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON \
-D Trilinos_ENABLE_KokkosExample:BOOL=ON \
-D Kokkos_ENABLE_EXAMPLES:BOOL=ON \
-D Kokkos_ENABLE_TESTS:BOOL=OFF \
-D KokkosClassic_DefaultNode:STRING="Kokkos::Compat::KokkosCudaWrapperNode" \
-D TpetraClassic_ENABLE_OpenMPNode=OFF \
-D TpetraClassic_ENABLE_TPINode=OFF \
-D TpetraClassic_ENABLE_MKL=OFF \
-D Kokkos_ENABLE_Cuda_UVM=ON \
\
\
-D Trilinos_ENABLE_Teuchos:BOOL=ON \
-D Teuchos_ENABLE_COMPLEX:BOOL=OFF \
\
\
-D Trilinos_ENABLE_Tpetra:BOOL=ON \
-D Tpetra_ENABLE_KokkosCore=ON \
-D Tpetra_ENABLE_Kokkos_DistObject=OFF \
-D Tpetra_ENABLE_Kokkos_Refactor=ON \
-D Tpetra_ENABLE_TESTS=ON \
-D Tpetra_ENABLE_EXAMPLES=ON \
-D Tpetra_ENABLE_MPI_CUDA_RDMA:BOOL=ON \
\
\
-D Trilinos_ENABLE_Belos=OFF \
-D Trilinos_ENABLE_Amesos=OFF \
-D Trilinos_ENABLE_Amesos2=OFF \
-D Trilinos_ENABLE_Ifpack=OFF \
-D Trilinos_ENABLE_Ifpack2=OFF \
-D Trilinos_ENABLE_Epetra=OFF \
-D Trilinos_ENABLE_EpetraExt=OFF \
-D Trilinos_ENABLE_Zoltan=OFF \
-D Trilinos_ENABLE_Zoltan2=OFF \
-D Trilinos_ENABLE_MueLu=OFF \
-D Belos_ENABLE_TESTS=ON \
-D Belos_ENABLE_EXAMPLES=ON \
-D MueLu_ENABLE_TESTS=ON \
-D MueLu_ENABLE_EXAMPLES=ON \
-D Ifpack2_ENABLE_TESTS=ON \
-D Ifpack2_ENABLE_EXAMPLES=ON \
$EXTRA_ARGS \
${HOME}/Trilinos

View File

@ -1,113 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Cuda, OpenMP, Threads, Qthread, hwloc
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
#
# The 'nvcc-wrapper' module should load a script that matches
# kokkos/config/nvcc_wrapper
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
#-----------------------------------------------------------------------------
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Hardware locality configuration:
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
#-----------------------------------------------------------------------------
# Pthread
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# Qthread
QTHREAD_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREAD:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_INCLUDE_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_LIBRARY_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/lib"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,104 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Cuda, OpenMP, hwloc
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
#
# The 'nvcc-wrapper' module should load a script that matches
# kokkos/config/nvcc_wrapper
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
#-----------------------------------------------------------------------------
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Hardware locality configuration:
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
#-----------------------------------------------------------------------------
# Pthread explicitly OFF so tribits doesn't automatically turn it on
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,88 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Cuda
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
#
# The 'nvcc-wrapper' module should load a script that matches
# kokkos/config/nvcc_wrapper
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
#-----------------------------------------------------------------------------
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
# Pthread explicitly OFF, otherwise tribits will automatically turn it on
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,84 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# C++11, OpenMP
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Pthread explicitly OFF so tribits doesn't automatically activate
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,78 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# <none>
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Kokkos Pthread explicitly OFF, TPL Pthread ON for gtest
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,89 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Intel, OpenMP, Cuda
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 cuda/7.0.4 intel/2015.0.090 nvcc-wrapper/intel
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Pthread explicitly OFF
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,84 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Intel, OpenMP
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 intel/13.SP1.1.106
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Pthread explicitly OFF
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,77 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# OpenMP
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
# Pthread explicitly OFF, otherwise tribits will automatically turn it on
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,87 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Threads, hwloc
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Hardware locality configuration:
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
#-----------------------------------------------------------------------------
# Pthread
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,185 +0,0 @@
#!/bin/bash
#
# This shell script (nvcc_wrapper) wraps both the host compiler and
# NVCC, if you are building Trilinos with CUDA enabled. The script
# remedies some differences between the interface of NVCC and that of
# the host compiler, in particular for linking. It also means that
# Trilinos doesn't need separate .cu files; it can just use .cpp
# files.
#
# Hopefully, at some point, NVIDIA may fix NVCC so as to make this
# script obsolete. For now, this script exists and if you want to
# build Trilinos with CUDA enabled, you must use this script as your
# compiler.
# Default settings: change those according to your machine. For
# example, you may have have two different wrappers with either icpc
# or g++ as their back-end compiler. The defaults can be overwritten
# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
default_arch="sm_35"
#default_arch="sm_50"
#
# The default C++ compiler.
#
default_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
#default_compiler="icpc"
#default_compiler="/usr/local/gcc/4.8.3/bin/g++"
#default_compiler="/usr/local/gcc/4.9.1/bin/g++"
#
# Internal variables
#
cpp_files=""
xcompiler_args=""
cuda_arg=""
xlinker_args=""
object_files=""
object_files_xlinker=""
first_host_option=1
arch_set=0
ccbin_set=0
nvcc_error_code=0
dry_run=0
replace_pragma_ident=0
#echo "Arguments: $# $@"
while [ $# -gt 0 ]
do
case $1 in
#show the executed command
--show)
dry_run=1
;;
#replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
--replace-pragma-ident)
replace_pragma_ident=1
;;
#handle source files to be compiled as cuda files
*.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
cpp_files="$cpp_files $1"
;;
#Handle known nvcc args
-O*|-D*|-gencode*|-c|-I*|-L*|-l*|-g|--help|--version|--dryrun|--verbose|--keep-dir|-E|-M|-G|--relocatable-device-code*|-shared|-lineinfo|-expt-extended-lambda|--resource-usage)
cuda_args="$cuda_args $1"
;;
#Handle c++11 setting
--std=c++11|-std=c++11)
cuda_args="$cuda_args $1"
;;
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
-std=c++98|--std=c++98)
;;
#Handle known nvcc args that have an argument
-o|-rdc|-maxrregcount|--default-stream)
cuda_args="$cuda_args $1 $2"
shift
;;
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
-pedantic|-Wpedantic|-ansi)
;;
#strip -Xcompiler because we add it
-Xcompiler)
;;
#strip of "-x cu" because we add that
-x)
if [[ $2 != "cu" ]]; then
xcompiler_args="$xcompiler_args,-x,$2"
fi
shift
;;
#Handle -ccbin (if its not set we can set it to a default value)
-ccbin)
cuda_args="$cuda_args $1 $2"
ccbin_set=1
shift
;;
#Handle -arch argument (if its not set use a default
-arch*)
cuda_args="$cuda_args $1"
arch_set=1
;;
#Handle -Xcudafe argument
-Xcudafe)
cuda_args="$cuda_args -Xcudafe $2"
shift
;;
#Handle args that should be sent to the linker
-Wl*)
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
;;
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
*.a|*.so|*.o|*.obj)
object_files="$object_files $1"
object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
*.so.*|*.dylib)
object_files_xlinker="$object_files_xlinker -Xlinker $1"
object_files="$object_files -Xlinker $1"
;;
#All other args are sent to the host compiler
*)
if [ $first_host_option -eq 0 ]; then
xcompiler_args="$xcompiler_args,$1"
else
xcompiler_args="-Xcompiler $1"
first_host_option=0
fi
;;
esac
shift
done
#Add default host compiler if necessary
if [ $ccbin_set -ne 1 ]; then
cuda_args="$cuda_args -ccbin $default_compiler"
fi
#Add architecture command
if [ $arch_set -ne 1 ]; then
cuda_args="$cuda_args -arch=$default_arch"
fi
#Compose compilation command
command="nvcc $cuda_args $xlinker_args $xcompiler_args"
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
if [ $replace_pragma_ident -eq 1 ]; then
cpp_files2=""
for file in $cpp_files
do
var=`grep pragma ${file} | grep ident | grep "#"`
if [ "${#var}" -gt 0 ]
then
sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > /tmp/nvcc_wrapper_tmp_$file
cpp_files2="$cpp_files2 /tmp/nvcc_wrapper_tmp_$file"
else
cpp_files2="$cpp_files2 $file"
fi
done
cpp_files=$cpp_files2
echo $cpp_files
fi
if [ "$cpp_files" ]; then
command="$command $object_files_xlinker -x cu $cpp_files"
else
command="$command $object_files"
fi
#Print command for dryrun
if [ $dry_run -eq 1 ]; then
echo $command
exit 0
fi
#Run compilation command
$command
nvcc_error_code=$?
#Report error code
exit $nvcc_error_code

View File

@ -1,279 +0,0 @@
#! /usr/bin/env python
"""
Snapshot a project into another project and perform the necessary repo actions
to provide a commit message that can be used to trace back to the exact point
in the source repository.
"""
#todo:
# Support svn
# Allow renaming of the source dir in the destination path
# Check if a new snapshot is necessary?
#
import sys
#check the version number so that there is a good error message when argparse is not available.
#This checks for exactly 2.7 which is bad, but it is a python 2 script and argparse was introduced
#in 2.7 which is also the last version of python 2. If this script is updated for python 3 this
#will need to change, but for now it is not safe to allow 3.x to run this.
if sys.version_info[:2] != (2, 7):
print "Error snapshot requires python 2.7 detected version is %d.%d." % (sys.version_info[0], sys.version_info[1])
sys.exit(1)
import subprocess, argparse, re, doctest, os, datetime, traceback
def parse_cmdline(description):
parser = argparse.ArgumentParser(usage="snapshot.py [options] source destination", description=description)
parser.add_argument("-n", "--no-comit", action="store_false", dest="create_commit", default=True,
help="Do not perform a commit or create a commit message.")
parser.add_argument("-v", "--verbose", action="store_true", dest="verbose_mode", default=False,
help="Enable verbose mode.")
parser.add_argument("-d", "--debug", action="store_true", dest="debug_mode", default=False,
help="Enable debugging output.")
parser.add_argument("--no-validate-repo", action="store_true", dest="no_validate_repo", default=False,
help="Reduce the validation that the source and destination repos are clean to a warning.")
parser.add_argument("--source-repo", choices=["git","none"], default="",
help="Type of repository of the source, use none to skip all repository operations.")
parser.add_argument("--dest-repo", choices=["git","none"], default="",
help="Type of repository of the destination, use none to skip all repository operations.")
parser.add_argument("source", help="Source project to snapshot from.")
parser.add_argument("destination", help="Destination to snapshot too.")
options = parser.parse_args()
options = validate_options(options)
return options
#end parseCmdline
def validate_options(options):
apparent_source_repo_type="none"
apparent_dest_repo_type="none"
#prevent user from accidentally giving us a path that rsync will treat differently than expected.
options.source = options.source.rstrip(os.sep)
options.destination = options.destination.rstrip(os.sep)
options.source = os.path.abspath(options.source)
options.destination = os.path.abspath(options.destination)
if os.path.exists(options.source):
apparent_source_repo_type, source_root = deterimine_repo_type(options.source)
else:
raise RuntimeError("Could not find source directory of %s." % options.source)
options.source_root = source_root
if not os.path.exists(options.destination):
print "Could not find destination directory of %s so it will be created." % options.destination
os.makedirs(options.destination)
apparent_dest_repo_type, dest_root = deterimine_repo_type(options.destination)
options.dest_root = dest_root
#error on svn repo types for now
if apparent_source_repo_type == "svn" or apparent_dest_repo_type == "svn":
raise RuntimeError("SVN repositories are not supported at this time.")
if options.source_repo == "":
#source repo type is not specified to just using the apparent type.
options.source_repo = apparent_source_repo_type
else:
if options.source_repo != "none" and options.source_repo != apparent_source_repo_type:
raise RuntimeError("Specified source repository type of %s conflicts with determined type of %s" % \
(options.source_repo, apparent_source_repo_type))
if options.dest_repo == "":
#destination repo type is not specified to just using the apparent type.
options.dest_repo = apparent_dest_repo_type
else:
if options.dest_repo != "none" and options.dest_repo != apparent_dest_repo_type:
raise RuntimeError("Specified destination repository type of %s conflicts with determined type of %s" % \
(options.dest_repo, apparent_dest_repo_type))
return options
#end validate_options
def run_cmd(cmd, options, working_dir="."):
cmd_str = " ".join(cmd)
if options.verbose_mode:
print "Running command '%s' in dir %s." % (cmd_str, working_dir)
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_dir)
proc_stdout, proc_stderr = proc.communicate()
ret_val = proc.wait()
if options.debug_mode:
print "==== %s stdout start ====" % cmd_str
print proc_stdout
print "==== %s stdout end ====" % cmd_str
print "==== %s stderr ====" % cmd_str
print proc_stderr
print "==== %s stderr ====" % cmd_str
if ret_val != 0:
raise RuntimeError("Command '%s' failed with error code %d. Error message:%s%s%sstdout:%s" % \
(cmd_str, ret_val, os.linesep, proc_stderr, os.linesep, proc_stdout))
return proc_stdout, proc_stderr
#end run_cmd
def deterimine_repo_type(location):
apparent_repo_type = "none"
while location != "":
if os.path.exists(os.path.join(location, ".git")):
apparent_repo_type = "git"
break
elif os.path.exists(os.path.join(location, ".svn")):
apparent_repo_type = "svn"
break
else:
location = location[:location.rfind(os.sep)]
return apparent_repo_type, location
#end deterimine_repo_type
def rsync(source, dest, options):
rsync_cmd = ["rsync", "-ar", "--delete"]
if options.debug_mode:
rsync_cmd.append("-v")
if options.source_repo == "git":
rsync_cmd.append("--exclude=.git")
rsync_cmd.append(options.source)
rsync_cmd.append(options.destination)
run_cmd(rsync_cmd, options)
#end rsync
def create_commit_message(commit_id, commit_log, project_name, project_location):
eol = os.linesep
message = "Snapshot of %s from commit %s" % (project_name, commit_id)
message += eol * 2
message += "From repository at %s" % project_location
message += eol * 2
message += "At commit:" + eol
message += commit_log
return message
#end create_commit_message
def find_git_commit_information(options):
r"""
>>> class fake_options:
... source="."
... verbose_mode=False
... debug_mode=False
>>> myoptions = fake_options()
>>> find_git_commit_information(myoptions)[2:]
('sems', 'software.sandia.gov:/git/sems')
"""
git_log_cmd = ["git", "log", "-1"]
output, error = run_cmd(git_log_cmd, options, options.source)
commit_match = re.match("commit ([0-9a-fA-F]+)", output)
commit_id = commit_match.group(1)
commit_log = output
git_remote_cmd = ["git", "remote", "-v"]
output, error = run_cmd(git_remote_cmd, options, options.source)
remote_match = re.search("origin\s([^ ]*/([^ ]+))", output, re.MULTILINE)
if not remote_match:
raise RuntimeError("Could not find origin of repo at %s. Consider using none for source repo type." % (options.source))
source_location = remote_match.group(1)
source_name = remote_match.group(2).strip()
if source_name[-1] == "/":
source_name = source_name[:-1]
return commit_id, commit_log, source_name, source_location
#end find_git_commit_information
def do_git_commit(message, options):
if options.verbose_mode:
print "Commiting to destination repository."
git_add_cmd = ["git", "add", "-A"]
run_cmd(git_add_cmd, options, options.destination)
git_commit_cmd = ["git", "commit", "-m%s" % message]
run_cmd(git_commit_cmd, options, options.destination)
git_log_cmd = ["git", "log", "--format=%h", "-1"]
commit_sha1, error = run_cmd(git_log_cmd, options, options.destination)
print "Commit %s was made to %s." % (commit_sha1.strip(), options.dest_root)
#end do_git_commit
def verify_git_repo_clean(location, options):
git_status_cmd = ["git", "status", "--porcelain"]
output, error = run_cmd(git_status_cmd, options, location)
if output != "":
if options.no_validate_repo == False:
raise RuntimeError("%s is not clean.%sPlease commit or stash all changes before running snapshot."
% (location, os.linesep))
else:
print "WARNING: %s is not clean. Proceeding anyway." % location
print "WARNING: This could lead to differences in the source and destination."
print "WARNING: It could also lead to extra files being included in the snapshot commit."
#end verify_git_repo_clean
def main(options):
if options.verbose_mode:
print "Snapshotting %s to %s." % (options.source, options.destination)
if options.source_repo == "git":
verify_git_repo_clean(options.source, options)
commit_id, commit_log, repo_name, repo_location = find_git_commit_information(options)
elif options.source_repo == "none":
commit_id = "N/A"
commit_log = "Unknown commit from %s snapshotted at: %s" % (options.source, datetime.datetime.now())
repo_name = options.source
repo_location = options.source
commit_message = create_commit_message(commit_id, commit_log, repo_name, repo_location) + os.linesep*2
if options.dest_repo == "git":
verify_git_repo_clean(options.destination, options)
rsync(options.source, options.destination, options)
if options.dest_repo == "git":
do_git_commit(commit_message, options)
elif options.dest_repo == "none":
file_name = "snapshot_message.txt"
message_file = open(file_name, "w")
message_file.write(commit_message)
message_file.close()
cwd = os.getcwd()
print "No commit done by request. Please use file at:"
print "%s%sif you wish to commit this to a repo later." % (cwd+"/"+file_name, os.linesep)
#end main
if (__name__ == "__main__"):
if ("--test" in sys.argv):
doctest.testmod()
sys.exit(0)
try:
options = parse_cmdline(__doc__)
main(options)
except RuntimeError, e:
print "Error occured:", e
if "--debug" in sys.argv:
traceback.print_exc()
sys.exit(1)
else:
sys.exit(0)

View File

@ -1,305 +0,0 @@
#!/bin/bash -e
#
# Global config
#
set -o pipefail
COMPILER_ROOT="/home/projects/x86-64"
GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CUDA_WARNING_FLAGS=""
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/4.7.2 gcc/4.7.2/base,hwloc/1.10.0/host/gnu/4.7.2 $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.8.4 gcc/4.9.2/base,hwloc/1.10.0/host/gnu/4.9.2 $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.9.2 gcc/4.9.2/base,hwloc/1.10.0/host/gnu/4.9.2 $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/5.1.0 gcc/5.1.0/base,hwloc/1.10.0/host/gnu/5.1.0 $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"intel/14.0.4 intel/14.0.4/base,hwloc/1.10.0/host/gnu/4.7.2 $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/15.0.2 intel/15.0.2/base,hwloc/1.10.0/host/gnu/4.7.2 $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"clang/3.5.2 clang/3.5.2/base $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.6.1 clang/3.6.1/base $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"cuda/6.5.14 cuda/6.5.14,nvcc-wrapper/gnu,gcc/4.7.2/base $CUDA_BUILD_LIST nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/7.0.28 cuda/7.0.18,nvcc-wrapper/gnu,gcc/4.7.2/base $CUDA_BUILD_LIST nvcc_wrapper $CUDA_WARNING_FLAGS"
)
export OMP_NUM_THREADS=4
export SEMS_MODULE_ROOT=/projects/modulefiles
module use /home/projects/modulefiles
module use /projects/modulefiles/rhel6-x86_64/sems/compiler
SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )
#
# Handle arguments
#
DEBUG=False
ARGS=""
CUSTOM_BUILD_LIST=""
DRYRUN=False
while [[ $# > 0 ]]
do
key="$1"
case $key in
--kokkos-path*)
KOKKOS_PATH="${key#*=}"
;;
--build-list*)
CUSTOM_BUILD_LIST="${key#*=}"
;;
--debug*)
DEBUG=True
;;
--dry-run*)
DRYRUN=True
;;
--help)
echo "test_all_sandia <ARGS> <OPTIONS>:"
echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory"
echo " Defaults to root repo containing this script"
echo "--debug: Run tests in debug. Defaults to False"
echo "--dry-run: Just print what would be executed"
echo "--build-list=BUILD,BUILD,BUILD..."
echo " Provide a comma-separated list of builds instead of running all builds"
echo " Valid items:"
echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial"
echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial"
echo ""
echo "ARGS: list of expressions matching compilers to test"
echo ""
echo "Examples:"
echo " Run all tests"
echo " % test_all_sandia"
echo ""
echo " Run all gcc tests"
echo " % test_all_sandia gcc"
echo ""
echo " Run all gcc/4.7.2 and all intel tests"
echo " % test_all_sandia gcc/4.7.2 intel"
echo ""
echo " Run all tests in debug"
echo " % test_all_sandia --debug"
echo ""
echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds"
echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial"
echo
exit 0
;;
*)
# args, just append
ARGS="$ARGS $1"
;;
esac
shift
done
# set kokkos path
if [ -z "$KOKKOS_PATH" ]; then
KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
else
# Ensure KOKKOS_PATH is abs path
KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
fi
# set build type
if [ "$DEBUG" = "True" ]; then
BUILD_TYPE=debug
else
BUILD_TYPE=release
fi
# If no args provided, do all compilers
if [ -z "$ARGS" ]; then
ARGS='?'
fi
# Process args to figure out which compilers to test
COMPILERS_TO_TEST=""
for ARG in $ARGS; do
for COMPILER_DATA in "${COMPILERS[@]}"; do
arr=($COMPILER_DATA)
COMPILER=${arr[0]}
if [[ "$COMPILER" = $ARG* ]]; then
if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then
COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER"
else
echo "Tried to add $COMPILER twice"
fi
fi
done
done
#
# Functions
#
# Do not call directly
get_compiler_data() {
compiler=$1
item=$2
for compiler_data in "${COMPILERS[@]}" ; do
arr=($compiler_data)
if [ "$compiler" = "${arr[0]}" ]; then
echo "${arr[$item]}" | tr , ' '
return 0
fi
done
# Not found
echo "Unreconized compiler $compiler" >&2
exit 1
}
#
# For all getters, usage: <GETTER> <COMPILER>
#
get_compiler_modules() {
get_compiler_data $1 1
}
get_compiler_build_list() {
get_compiler_data $1 2
}
get_compiler_exe_name() {
get_compiler_data $1 3
}
get_compiler_warning_flags() {
get_compiler_data $1 4
}
run_cmd() {
echo "RUNNING: $*"
if [ "$DRYRUN" != "True" ]; then
eval "$*"
fi
}
report_and_log_test_result() {
if [ "$1" = "0" ]; then
echo "PASSED $2"
TEST_RESULTS="${TEST_RESULTS}\nPASSED $2"
else
echo "FAILED $2" >&2
TEST_RESULTS="${TEST_RESULTS}\nFAILED $2 ($3)"
NUM_FAILED+=1
fi
}
# single_build_and_test <COMPILER> <BUILD> <BUILD_TYPE>
single_build_and_test() {
# Use sane var names
local compiler=$1; local build=$2; local build_type=$3;
cd $ROOT_DIR/$compiler
local compiler_warning_flags=$(get_compiler_warning_flags $compiler)
local compiler_exe=$(get_compiler_exe_name $compiler)
if [[ "$build_type" = hwloc* ]]; then
local extra_args="--with-hwloc=$HWLOC_ROOT"
fi
if [[ "$build_type" = *debug* ]]; then
local extra_args="$extra_args --debug"
local cxxflags="-g $compiler_warning_flags"
else
local cxxflags="-O3 $compiler_warning_flags"
fi
local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g')
echo " Doing build: $desc"
mkdir "${build}-$build_type"
cd "${build}-$build_type"
# cxxflags="-DKOKKOS_USING_EXPERIMENTAL_VIEW $cxxflags"
run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" \"$extra_args\" 2>&1 | tee ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
run_cmd make build-test 2>&1 | tee ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
run_cmd make test 2>&1 | tee ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; }
report_and_log_test_result 0 $desc
return 0
}
setup_env() {
local compiler=$1
local compiler_modules=$(get_compiler_modules $compiler)
module purge
for mod in $compiler_modules; do
module load $mod
# It is ridiculously hard to check for the success of a loaded
# module. Module does not return error codes and piping to grep
# causes module to run in a subshell.
module list 2>&1 | grep "$mod"
done
}
# build_and_test_all <COMPILER>
build_and_test_all() {
# Get compiler data
local compiler=$1
if [ -z "$CUSTOM_BUILD_LIST" ]; then
local compiler_build_list=$(get_compiler_build_list $compiler)
else
local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ')
fi
# set up env
cd $ROOT_DIR
mkdir -p $compiler
setup_env $compiler
# do builds
for build in $compiler_build_list
do
single_build_and_test $compiler $build $BUILD_TYPE
# If not cuda, do a hwloc test too
if [[ "$compiler" != cuda* ]]; then
single_build_and_test $compiler $build "hwloc-$BUILD_TYPE"
fi
done
return 0
}
#
# Main
#
/bin/rm -rf TestAll
mkdir TestAll
cd TestAll
TEST_RESULTS=""
declare -i NUM_FAILED=0
ROOT_DIR=$(pwd)
for COMPILER in $COMPILERS_TO_TEST; do
echo "Testing compiler $COMPILER"
build_and_test_all $COMPILER
done
echo "#######################################################"
echo "RESULT SUMMARY"
echo "#######################################################"
echo -e $TEST_RESULTS
exit $NUM_FAILED

View File

@ -1,5 +0,0 @@
jenkins_test_driver is designed to be run through Jenkins as a
multiconfiguration job. It relies on a number of environment variables that will
only be set when run in that context. It is possible to override these if you
know the Jenkins job setup. It is not recommended that a non-expert try to run
this script directly.

View File

@ -1,83 +0,0 @@
#!/bin/bash -x
echo "Building for BUILD_TYPE = ${BUILD_TYPE}"
echo "Building with HOST_COMPILER = ${HOST_COMPILER}"
echo "Building in ${WORKSPACE}"
module use /home/projects/modulefiles
BUILD_TYPE=`echo $BUILD_TYPE | tr "~" " "`
build_options=""
for item in ${BUILD_TYPE}; do
build_options="$build_options --with-$item"
done
kokkos_path=${WORKSPACE}/kokkos
gtest_path=${WORKSPACE}/kokkos/tpls/gtest
echo ${WORKSPACE}
pwd
#extract information from the provided parameters.
host_compiler_brand=`echo $HOST_COMPILER | grep -o "^[a-zA-Z]*"`
cuda_compiler=`echo $BUILD_TYPE | grep -o "cuda_[^ ]*"`
host_compiler_module=`echo $HOST_COMPILER | tr "_" "/"`
cuda_compiler_module=`echo $cuda_compiler | tr "_" "/"`
build_path=`echo $BUILD_TYPE | tr " " "_"`
module load $host_compiler_module
module load $cuda_compiler_module
case $host_compiler_brand in
gcc)
module load nvcc-wrapper/gnu
compiler=g++
;;
intel)
module load nvcc-wrapper/intel
compiler=icpc
;;
*)
echo "Unrecognized compiler brand."
exit 1
;;
esac
#if cuda is on we need to set the host compiler for the
#nvcc wrapper and make the wrapper the compiler.
if [ $cuda_compiler != "" ]; then
export NVCC_WRAPPER_DEFAULT_COMPILER=$compiler
compiler=$kokkos_path/config/nvcc_wrapper
fi
if [ $host_compiler_brand == "intel" -a $cuda_compiler != "" ]; then
echo "Intel compilers are not supported with cuda at this time."
exit 0
fi
rm -rf test-$build_path
mkdir test-$build_path
cd test-$build_path
/bin/bash $kokkos_path/generate_makefile.bash $build_options --kokkos-path="$kokkos_path" --with-gtest="$gtest_path" --compiler=$compiler 2>&1 |tee configure.out
if [ ${PIPESTATUS[0]} != 0 ]; then
echo "Configure failed."
exit 1
fi
make build-test 2>&1 | tee build.log
if [ ${PIPESTATUS[0]} != 0 ]; then
echo "Build failed."
exit 1
fi
make test 2>&1 | tee test.log
grep "FAIL" test.log
if [ $? == 0 ]; then
echo "Tests failed."
exit 1
fi

View File

@ -1,81 +0,0 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/containers/performance_tests
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests
TEST_TARGETS =
TARGETS =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = TestCuda.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_Cuda
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = TestThreads.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_Threads
TEST_TARGETS += test-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_OpenMP
TEST_TARGETS += test-openmp
endif
KokkosContainers_PerformanceTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Cuda
KokkosContainers_PerformanceTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Threads
KokkosContainers_PerformanceTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_OpenMP
test-cuda: KokkosContainers_PerformanceTest_Cuda
./KokkosContainers_PerformanceTest_Cuda
test-threads: KokkosContainers_PerformanceTest_Threads
./KokkosContainers_PerformanceTest_Threads
test-openmp: KokkosContainers_PerformanceTest_OpenMP
./KokkosContainers_PerformanceTest_OpenMP
build_all: $(TARGETS)
test: $(TEST_TARGETS)
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc

View File

@ -1,100 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdint.h>
#include <string>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <fstream>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#include <Kokkos_UnorderedMap.hpp>
#include <TestGlobal2LocalIds.hpp>
#include <TestUnorderedMapPerformance.hpp>
namespace Performance {
class cuda : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase()
{
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};
TEST_F( cuda, global_2_local)
{
std::cout << "Cuda" << std::endl;
std::cout << "size, create, generate, fill, find" << std::endl;
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
test_global_to_local_ids<Kokkos::Cuda>(i);
}
TEST_F( cuda, unordered_map_performance_near)
{
Perf::run_performance_tests<Kokkos::Cuda,true>("cuda-near");
}
TEST_F( cuda, unordered_map_performance_far)
{
Perf::run_performance_tests<Kokkos::Cuda,false>("cuda-far");
}
}
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */

View File

@ -1,231 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
#define KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
#include <Kokkos_Core.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <vector>
#include <algorithm>
#include <impl/Kokkos_Timer.hpp>
// This test will simulate global ids
namespace Performance {
static const unsigned begin_id_size = 256u;
static const unsigned end_id_size = 1u << 22;
static const unsigned id_step = 2u;
union helper
{
uint32_t word;
uint8_t byte[4];
};
template <typename Device>
struct generate_ids
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
local_id_view local_2_global;
generate_ids( local_id_view & ids)
: local_2_global(ids)
{
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const
{
helper x = {static_cast<uint32_t>(i)};
// shuffle the bytes of i to create a unique, semi-random global_id
x.word = ~x.word;
uint8_t tmp = x.byte[3];
x.byte[3] = x.byte[1];
x.byte[1] = tmp;
tmp = x.byte[2];
x.byte[2] = x.byte[0];
x.byte[0] = tmp;
local_2_global[i] = x.word;
}
};
template <typename Device>
struct fill_map
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
global_id_view global_2_local;
local_id_view local_2_global;
fill_map( global_id_view gIds, local_id_view lIds)
: global_2_local(gIds) , local_2_global(lIds)
{
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const
{
global_2_local.insert( local_2_global[i], i);
}
};
template <typename Device>
struct find_test
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
typedef Kokkos::UnorderedMap<const uint32_t, const size_type,execution_space> global_id_view;
global_id_view global_2_local;
local_id_view local_2_global;
typedef size_t value_type;
find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors)
: global_2_local(gIds) , local_2_global(lIds)
{
Kokkos::parallel_reduce(local_2_global.dimension_0(), *this, num_errors);
}
KOKKOS_INLINE_FUNCTION
void init(value_type & v) const
{ v = 0; }
KOKKOS_INLINE_FUNCTION
void join(volatile value_type & dst, volatile value_type const & src) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(size_type i, value_type & num_errors) const
{
uint32_t index = global_2_local.find( local_2_global[i] );
if ( global_2_local.value_at(index) != i) ++num_errors;
}
};
template <typename Device>
void test_global_to_local_ids(unsigned num_ids)
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
//size
std::cout << num_ids << ", ";
double elasped_time = 0;
Kokkos::Impl::Timer timer;
local_id_view local_2_global("local_ids", num_ids);
global_id_view global_2_local((3u*num_ids)/2u);
//create
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
// generate unique ids
{
generate_ids<Device> gen(local_2_global);
}
Device::fence();
// generate
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
{
fill_map<Device> fill(global_2_local, local_2_global);
}
Device::fence();
// fill
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
size_t num_errors = 0;
for (int i=0; i<100; ++i)
{
find_test<Device> find(global_2_local, local_2_global,num_errors);
}
Device::fence();
// find
elasped_time = timer.seconds();
std::cout << elasped_time << std::endl;
ASSERT_EQ( num_errors, 0u);
}
} // namespace Performance
#endif //KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP

View File

@ -1,50 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
int main(int argc, char *argv[]) {
::testing::InitGoogleTest(&argc,argv);
return RUN_ALL_TESTS();
}

View File

@ -1,131 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <TestGlobal2LocalIds.hpp>
#include <TestUnorderedMapPerformance.hpp>
#include <iomanip>
#include <sstream>
#include <string>
#include <fstream>
namespace Performance {
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count()
* Kokkos::hwloc::get_available_cores_per_numa()
* Kokkos::hwloc::get_available_threads_per_core()
;
}
std::cout << "OpenMP: " << num_threads << std::endl;
Kokkos::OpenMP::initialize( num_threads );
std::cout << "available threads: " << omp_get_max_threads() << std::endl;
}
static void TearDownTestCase()
{
Kokkos::OpenMP::finalize();
omp_set_num_threads(1);
ASSERT_EQ( 1 , omp_get_max_threads() );
}
};
TEST_F( openmp, global_2_local)
{
std::cout << "OpenMP" << std::endl;
std::cout << "size, create, generate, fill, find" << std::endl;
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
test_global_to_local_ids<Kokkos::OpenMP>(i);
}
TEST_F( openmp, unordered_map_performance_near)
{
unsigned num_openmp = 4;
if (Kokkos::hwloc::available()) {
num_openmp = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::ostringstream base_file_name;
base_file_name << "openmp-" << num_openmp << "-near";
Perf::run_performance_tests<Kokkos::OpenMP,true>(base_file_name.str());
}
TEST_F( openmp, unordered_map_performance_far)
{
unsigned num_openmp = 4;
if (Kokkos::hwloc::available()) {
num_openmp = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::ostringstream base_file_name;
base_file_name << "openmp-" << num_openmp << "-far";
Perf::run_performance_tests<Kokkos::OpenMP,false>(base_file_name.str());
}
} // namespace test

View File

@ -1,126 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <iomanip>
#include <TestGlobal2LocalIds.hpp>
#include <TestUnorderedMapPerformance.hpp>
#include <iomanip>
#include <sstream>
#include <string>
#include <fstream>
namespace Performance {
class threads : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::cout << "Threads: " << num_threads << std::endl;
Kokkos::Threads::initialize( num_threads );
}
static void TearDownTestCase()
{
Kokkos::Threads::finalize();
}
};
TEST_F( threads, global_2_local)
{
std::cout << "Threads" << std::endl;
std::cout << "size, create, generate, fill, find" << std::endl;
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
test_global_to_local_ids<Kokkos::Threads>(i);
}
TEST_F( threads, unordered_map_performance_near)
{
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::ostringstream base_file_name;
base_file_name << "threads-" << num_threads << "-near";
Perf::run_performance_tests<Kokkos::Threads,true>(base_file_name.str());
}
TEST_F( threads, unordered_map_performance_far)
{
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::ostringstream base_file_name;
base_file_name << "threads-" << num_threads << "-far";
Perf::run_performance_tests<Kokkos::Threads,false>(base_file_name.str());
}
} // namespace Performance

View File

@ -1,262 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
#define KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
#include <impl/Kokkos_Timer.hpp>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <sstream>
namespace Perf {
template <typename Device, bool Near>
struct UnorderedMapTest
{
typedef Device execution_space;
typedef Kokkos::UnorderedMap<uint32_t, uint32_t, execution_space> map_type;
typedef typename map_type::histogram_type histogram_type;
struct value_type {
uint32_t failed_count;
uint32_t max_list;
};
uint32_t capacity;
uint32_t inserts;
uint32_t collisions;
double seconds;
map_type map;
histogram_type histogram;
UnorderedMapTest( uint32_t arg_capacity, uint32_t arg_inserts, uint32_t arg_collisions)
: capacity(arg_capacity)
, inserts(arg_inserts)
, collisions(arg_collisions)
, seconds(0)
, map(capacity)
, histogram(map.get_histogram())
{
Kokkos::Impl::Timer wall_clock ;
wall_clock.reset();
value_type v = {};
int loop_count = 0;
do {
++loop_count;
v = value_type();
Kokkos::parallel_reduce(inserts, *this, v);
if (v.failed_count > 0u) {
const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + v.failed_count/collisions ;
map.rehash( new_capacity );
}
} while (v.failed_count > 0u);
seconds = wall_clock.seconds();
switch (loop_count)
{
case 1u: std::cout << " \033[0;32m" << loop_count << "\033[0m "; break;
case 2u: std::cout << " \033[1;31m" << loop_count << "\033[0m "; break;
default: std::cout << " \033[0;31m" << loop_count << "\033[0m "; break;
}
std::cout << std::setprecision(2) << std::fixed << std::setw(5) << (1e9*(seconds/(inserts))) << "; " << std::flush;
histogram.calculate();
Device::fence();
}
void print(std::ostream & metrics_out, std::ostream & length_out, std::ostream & distance_out, std::ostream & block_distance_out)
{
metrics_out << map.capacity() << " , ";
metrics_out << inserts/collisions << " , ";
metrics_out << (100.0 * inserts/collisions) / map.capacity() << " , ";
metrics_out << inserts << " , ";
metrics_out << (map.failed_insert() ? "true" : "false") << " , ";
metrics_out << collisions << " , ";
metrics_out << 1e9*(seconds/inserts) << " , ";
metrics_out << seconds << std::endl;
length_out << map.capacity() << " , ";
length_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
length_out << collisions << " , ";
histogram.print_length(length_out);
distance_out << map.capacity() << " , ";
distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
distance_out << collisions << " , ";
histogram.print_distance(distance_out);
block_distance_out << map.capacity() << " , ";
block_distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
block_distance_out << collisions << " , ";
histogram.print_block_distance(block_distance_out);
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const
{
v.failed_count = 0;
v.max_list = 0;
}
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, const volatile value_type & src ) const
{
dst.failed_count += src.failed_count;
dst.max_list = src.max_list < dst.max_list ? dst.max_list : src.max_list;
}
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & v) const
{
const uint32_t key = Near ? i/collisions : i%(inserts/collisions);
typename map_type::insert_result result = map.insert(key,i);
v.failed_count += !result.failed() ? 0 : 1;
v.max_list = result.list_position() < v.max_list ? v.max_list : result.list_position();
}
};
//#define KOKKOS_COLLECT_UNORDERED_MAP_METRICS
template <typename Device, bool Near>
void run_performance_tests(std::string const & base_file_name)
{
#if defined(KOKKOS_COLLECT_UNORDERED_MAP_METRICS)
std::string metrics_file_name = base_file_name + std::string("-metrics.csv");
std::string length_file_name = base_file_name + std::string("-length.csv");
std::string distance_file_name = base_file_name + std::string("-distance.csv");
std::string block_distance_file_name = base_file_name + std::string("-block_distance.csv");
std::ofstream metrics_out( metrics_file_name.c_str(), std::ofstream::out );
std::ofstream length_out( length_file_name.c_str(), std::ofstream::out );
std::ofstream distance_out( distance_file_name.c_str(), std::ofstream::out );
std::ofstream block_distance_out( block_distance_file_name.c_str(), std::ofstream::out );
/*
const double test_ratios[] = {
0.50
, 0.75
, 0.80
, 0.85
, 0.90
, 0.95
, 1.00
, 1.25
, 2.00
};
*/
const double test_ratios[] = { 1.00 };
const int num_ratios = sizeof(test_ratios) / sizeof(double);
/*
const uint32_t collisions[] {
1
, 4
, 16
, 64
};
*/
const uint32_t collisions[] = { 16 };
const int num_collisions = sizeof(collisions) / sizeof(uint32_t);
// set up file headers
metrics_out << "Capacity , Unique , Percent Full , Attempted Inserts , Failed Inserts , Collision Ratio , Nanoseconds/Inserts, Seconds" << std::endl;
length_out << "Capacity , Percent Full , ";
distance_out << "Capacity , Percent Full , ";
block_distance_out << "Capacity , Percent Full , ";
for (int i=0; i<100; ++i) {
length_out << i << " , ";
distance_out << i << " , ";
block_distance_out << i << " , ";
}
length_out << "\b\b\b " << std::endl;
distance_out << "\b\b\b " << std::endl;
block_distance_out << "\b\b\b " << std::endl;
Kokkos::Impl::Timer wall_clock ;
for (int i=0; i < num_collisions ; ++i) {
wall_clock.reset();
std::cout << "Collisions: " << collisions[i] << std::endl;
for (int j = 0; j < num_ratios; ++j) {
std::cout << std::setprecision(1) << std::fixed << std::setw(5) << (100.0*test_ratios[j]) << "% " << std::flush;
for (uint32_t capacity = 1<<14; capacity < 1<<25; capacity = capacity << 1) {
uint32_t inserts = static_cast<uint32_t>(test_ratios[j]*(capacity));
std::cout << capacity << std::flush;
UnorderedMapTest<Device, Near> test(capacity, inserts*collisions[i], collisions[i]);
Device::fence();
test.print(metrics_out, length_out, distance_out, block_distance_out);
}
std::cout << "\b\b " << std::endl;
}
std::cout << " " << wall_clock.seconds() << " secs" << std::endl;
}
metrics_out.close();
length_out.close();
distance_out.close();
block_distance_out.close();
#else
(void)base_file_name;
std::cout << "skipping test" << std::endl;
#endif
}
} // namespace Perf
#endif //KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP

View File

@ -1,437 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BITSET_HPP
#define KOKKOS_BITSET_HPP
#include <Kokkos_Core.hpp>
#include <Kokkos_Functional.hpp>
#include <impl/Kokkos_Bitset_impl.hpp>
#include <stdexcept>
namespace Kokkos {
template <typename Device = Kokkos::DefaultExecutionSpace >
class Bitset;
template <typename Device = Kokkos::DefaultExecutionSpace >
class ConstBitset;
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
/// A thread safe view to a bitset
template <typename Device>
class Bitset
{
public:
typedef Device execution_space;
typedef unsigned size_type;
enum { BIT_SCAN_REVERSE = 1u };
enum { MOVE_HINT_BACKWARD = 2u };
enum {
BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u
, BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE
, BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD
, BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD
};
private:
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
enum { block_mask = block_size-1u };
enum { block_shift = static_cast<int>(Impl::power_of_two<block_size>::value) };
public:
/// constructor
/// arg_size := number of bit in set
Bitset(unsigned arg_size = 0u)
: m_size(arg_size)
, m_last_block_mask(0u)
, m_blocks("Bitset", ((m_size + block_mask) >> block_shift) )
{
for (int i=0, end = static_cast<int>(m_size & block_mask); i < end; ++i) {
m_last_block_mask |= 1u << i;
}
}
/// assignment
Bitset<Device> & operator = (Bitset<Device> const & rhs)
{
this->m_size = rhs.m_size;
this->m_last_block_mask = rhs.m_last_block_mask;
this->m_blocks = rhs.m_blocks;
return *this;
}
/// copy constructor
Bitset( Bitset<Device> const & rhs)
: m_size( rhs.m_size )
, m_last_block_mask( rhs.m_last_block_mask )
, m_blocks( rhs.m_blocks )
{}
/// number of bits in the set
/// can be call from the host or the device
KOKKOS_FORCEINLINE_FUNCTION
unsigned size() const
{ return m_size; }
/// number of bits which are set to 1
/// can only be called from the host
unsigned count() const
{
Impl::BitsetCount< Bitset<Device> > f(*this);
return f.apply();
}
/// set all bits to 1
/// can only be called from the host
void set()
{
Kokkos::deep_copy(m_blocks, ~0u );
if (m_last_block_mask) {
//clear the unused bits in the last block
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
raw_deep_copy( m_blocks.ptr_on_device() + (m_blocks.dimension_0() -1u), &m_last_block_mask, sizeof(unsigned));
}
}
/// set all bits to 0
/// can only be called from the host
void reset()
{
Kokkos::deep_copy(m_blocks, 0u );
}
/// set all bits to 0
/// can only be called from the host
void clear()
{
Kokkos::deep_copy(m_blocks, 0u );
}
/// set i'th bit to 1
/// can only be called from the device
KOKKOS_FORCEINLINE_FUNCTION
bool set( unsigned i ) const
{
if ( i < m_size ) {
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return !( atomic_fetch_or( block_ptr, mask ) & mask );
}
return false;
}
/// set i'th bit to 0
/// can only be called from the device
KOKKOS_FORCEINLINE_FUNCTION
bool reset( unsigned i ) const
{
if ( i < m_size ) {
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return atomic_fetch_and( block_ptr, ~mask ) & mask;
}
return false;
}
/// return true if the i'th bit set to 1
/// can only be called from the device
KOKKOS_FORCEINLINE_FUNCTION
bool test( unsigned i ) const
{
if ( i < m_size ) {
const unsigned block = volatile_load(&m_blocks[ i >> block_shift ]);
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return block & mask;
}
return false;
}
/// used with find_any_set_near or find_any_unset_near functions
/// returns the max number of times those functions should be call
/// when searching for an available bit
KOKKOS_FORCEINLINE_FUNCTION
unsigned max_hint() const
{
return m_blocks.dimension_0();
}
/// find a bit set to 1 near the hint
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
/// and if result.first is false the result.second is a new hint
KOKKOS_INLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
{
const unsigned block_idx = (hint >> block_shift) < m_blocks.dimension_0() ? (hint >> block_shift) : 0;
const unsigned offset = hint & block_mask;
unsigned block = volatile_load(&m_blocks[ block_idx ]);
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1)) ? block : block & m_last_block_mask ;
return find_any_helper(block_idx, offset, block, scan_direction);
}
/// find a bit set to 0 near the hint
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
/// and if result.first is false the result.second is a new hint
KOKKOS_INLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_unset_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
{
const unsigned block_idx = hint >> block_shift;
const unsigned offset = hint & block_mask;
unsigned block = volatile_load(&m_blocks[ block_idx ]);
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1) ) ? ~block : ~block & m_last_block_mask ;
return find_any_helper(block_idx, offset, block, scan_direction);
}
private:
KOKKOS_FORCEINLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_helper(unsigned block_idx, unsigned offset, unsigned block, unsigned scan_direction) const
{
Kokkos::pair<bool, unsigned> result( block > 0u, 0);
if (!result.first) {
result.second = update_hint( block_idx, offset, scan_direction );
}
else {
result.second = scan_block( (block_idx << block_shift)
, offset
, block
, scan_direction
);
}
return result;
}
KOKKOS_FORCEINLINE_FUNCTION
unsigned scan_block(unsigned block_start, int offset, unsigned block, unsigned scan_direction ) const
{
offset = !(scan_direction & BIT_SCAN_REVERSE) ? offset : (offset + block_mask) & block_mask;
block = Impl::rotate_right(block, offset);
return ((( !(scan_direction & BIT_SCAN_REVERSE) ?
Impl::bit_scan_forward(block) :
Impl::bit_scan_reverse(block)
) + offset
) & block_mask
) + block_start;
}
KOKKOS_FORCEINLINE_FUNCTION
unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const
{
block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
block_idx = block_idx >= 0 ? block_idx : m_blocks.dimension_0() - 1;
block_idx = block_idx < static_cast<long long>(m_blocks.dimension_0()) ? block_idx : 0;
return static_cast<unsigned>(block_idx)*block_size + offset;
}
private:
unsigned m_size;
unsigned m_last_block_mask;
View< unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
private:
template <typename DDevice>
friend class Bitset;
template <typename DDevice>
friend class ConstBitset;
template <typename Bitset>
friend struct Impl::BitsetCount;
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
};
/// a thread-safe view to a const bitset
/// i.e. can only test bits
template <typename Device>
class ConstBitset
{
public:
typedef Device execution_space;
typedef unsigned size_type;
private:
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
enum { block_mask = block_size -1u };
enum { block_shift = static_cast<int>(Impl::power_of_two<block_size>::value) };
public:
ConstBitset()
: m_size (0)
{}
ConstBitset(Bitset<Device> const& rhs)
: m_size(rhs.m_size)
, m_blocks(rhs.m_blocks)
{}
ConstBitset(ConstBitset<Device> const& rhs)
: m_size( rhs.m_size )
, m_blocks( rhs.m_blocks )
{}
ConstBitset<Device> & operator = (Bitset<Device> const & rhs)
{
this->m_size = rhs.m_size;
this->m_blocks = rhs.m_blocks;
return *this;
}
ConstBitset<Device> & operator = (ConstBitset<Device> const & rhs)
{
this->m_size = rhs.m_size;
this->m_blocks = rhs.m_blocks;
return *this;
}
KOKKOS_FORCEINLINE_FUNCTION
unsigned size() const
{
return m_size;
}
unsigned count() const
{
Impl::BitsetCount< ConstBitset<Device> > f(*this);
return f.apply();
}
KOKKOS_FORCEINLINE_FUNCTION
bool test( unsigned i ) const
{
if ( i < m_size ) {
const unsigned block = m_blocks[ i >> block_shift ];
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return block & mask;
}
return false;
}
private:
unsigned m_size;
View< const unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
private:
template <typename DDevice>
friend class ConstBitset;
template <typename Bitset>
friend struct Impl::BitsetCount;
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
};
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src)
{
if (dst.size() != src.size()) {
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
}
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
{
if (dst.size() != src.size()) {
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
}
template <typename DstDevice, typename SrcDevice>
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
{
if (dst.size() != src.size()) {
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
}
} // namespace Kokkos
#endif //KOKKOS_BITSET_HPP

View File

@ -1,880 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_DualView.hpp
/// \brief Declaration and definition of Kokkos::DualView.
///
/// This header file declares and defines Kokkos::DualView and its
/// related nonmember functions.
#ifndef KOKKOS_DUALVIEW_HPP
#define KOKKOS_DUALVIEW_HPP
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
namespace Kokkos {
/* \class DualView
* \brief Container to manage mirroring a Kokkos::View that lives
* in device memory with a Kokkos::View that lives in host memory.
*
* This class provides capabilities to manage data which exists in two
* memory spaces at the same time. It keeps views of the same layout
* on two memory spaces as well as modified flags for both
* allocations. Users are responsible for setting the modified flags
* manually if they change the data in either memory space, by calling
* the sync() method templated on the device where they modified the
* data. Users may synchronize data by calling the modify() function,
* templated on the device towards which they want to synchronize
* (i.e., the target of the one-way copy operation).
*
* The DualView class also provides convenience methods such as
* realloc, resize and capacity which call the appropriate methods of
* the underlying Kokkos::View objects.
*
* The four template arguments are the same as those of Kokkos::View.
* (Please refer to that class' documentation for a detailed
* description.)
*
* \tparam DataType The type of the entries stored in the container.
*
* \tparam Layout The array's layout in memory.
*
* \tparam Device The Kokkos Device type. If its memory space is
* not the same as the host's memory space, then DualView will
* contain two separate Views: one in device memory, and one in
* host memory. Otherwise, DualView will only store one View.
*
* \tparam MemoryTraits (optional) The user's intended memory access
* behavior. Please see the documentation of Kokkos::View for
* examples. The default suffices for most users.
*/
template< class DataType ,
class Arg1Type = void ,
class Arg2Type = void ,
class Arg3Type = void>
class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
{
public:
//! \name Typedefs for device types and various Kokkos::View specializations.
//@{
typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
//! The Kokkos Host Device type;
typedef typename traits::host_mirror_space host_mirror_space ;
//! The type of a Kokkos::View on the device.
typedef View< typename traits::data_type ,
typename traits::array_layout ,
typename traits::device_type ,
typename traits::memory_traits > t_dev ;
/// \typedef t_host
/// \brief The type of a Kokkos::View host mirror of \c t_dev.
typedef typename t_dev::HostMirror t_host ;
//! The type of a const View on the device.
//! The type of a Kokkos::View on the device.
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
typename traits::memory_traits > t_dev_const ;
/// \typedef t_host_const
/// \brief The type of a const View host mirror of \c t_dev_const.
typedef typename t_dev_const::HostMirror t_host_const;
//! The type of a const, random-access View on the device.
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
MemoryRandomAccess > t_dev_const_randomread ;
/// \typedef t_host_const_randomread
/// \brief The type of a const, random-access View host mirror of
/// \c t_dev_const_randomread.
typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread;
//! The type of an unmanaged View on the device.
typedef View< typename traits::data_type ,
typename traits::array_layout ,
typename traits::device_type ,
MemoryUnmanaged> t_dev_um;
//! The type of an unmanaged View host mirror of \c t_dev_um.
typedef View< typename t_host::data_type ,
typename t_host::array_layout ,
typename t_host::device_type ,
MemoryUnmanaged> t_host_um;
//! The type of a const unmanaged View on the device.
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
MemoryUnmanaged> t_dev_const_um;
//! The type of a const unmanaged View host mirror of \c t_dev_const_um.
typedef View<typename t_host::const_data_type,
typename t_host::array_layout,
typename t_host::device_type,
MemoryUnmanaged> t_host_const_um;
//@}
//! \name The two View instances.
//@{
t_dev d_view;
t_host h_view;
//@}
//! \name Counters to keep track of changes ("modified" flags)
//@{
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_device;
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_host;
//@}
//! \name Constructors
//@{
/// \brief Empty constructor.
///
/// Both device and host View objects are constructed using their
/// default constructors. The "modified" flags are both initialized
/// to "unmodified."
DualView () :
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
{}
/// \brief Constructor that allocates View objects on both host and device.
///
/// This constructor works like the analogous constructor of View.
/// The first argument is a string label, which is entirely for your
/// benefit. (Different DualView objects may have the same label if
/// you like.) The arguments that follow are the dimensions of the
/// View objects. For example, if the View has three dimensions,
/// the first three integer arguments will be nonzero, and you may
/// omit the integer arguments that follow.
DualView (const std::string& label,
const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0,
const size_t n7 = 0)
: d_view (label, n0, n1, n2, n3, n4, n5, n6, n7)
, h_view (create_mirror_view (d_view)) // without UVM, host View mirrors
, modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device"))
, modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
{}
//! Copy constructor (shallow copy)
template<class SS, class LS, class DS, class MS>
DualView (const DualView<SS,LS,DS,MS>& src) :
d_view (src.d_view),
h_view (src.h_view),
modified_device (src.modified_device),
modified_host (src.modified_host)
{}
/// \brief Create DualView from existing device and host View objects.
///
/// This constructor assumes that the device and host View objects
/// are synchronized. You, the caller, are responsible for making
/// sure this is the case before calling this constructor. After
/// this constructor returns, you may use DualView's sync() and
/// modify() methods to ensure synchronization of the View objects.
///
/// \param d_view_ Device View
/// \param h_view_ Host View (must have type t_host = t_dev::HostMirror)
DualView (const t_dev& d_view_, const t_host& h_view_) :
d_view (d_view_),
h_view (h_view_),
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
{
Impl::assert_shapes_are_equal (d_view.shape (), h_view.shape ());
}
//@}
//! \name Methods for synchronizing, marking as modified, and getting Views.
//@{
/// \brief Return a View on a specific device \c Device.
///
/// Please don't be afraid of the if_c expression in the return
/// value's type. That just tells the method what the return type
/// should be: t_dev if the \c Device template parameter matches
/// this DualView's device type, else t_host.
///
/// For example, suppose you create a DualView on Cuda, like this:
/// \code
/// typedef Kokkos::DualView<float, Kokkos::LayoutRight, Kokkos::Cuda> dual_view_type;
/// dual_view_type DV ("my dual view", 100);
/// \endcode
/// If you want to get the CUDA device View, do this:
/// \code
/// typename dual_view_type::t_dev cudaView = DV.view<Kokkos::Cuda> ();
/// \endcode
/// and if you want to get the host mirror of that View, do this:
/// \code
/// typedef typename Kokkos::HostSpace::execution_space host_device_type;
/// typename dual_view_type::t_host hostView = DV.view<host_device_type> ();
/// \endcode
template< class Device >
KOKKOS_INLINE_FUNCTION
const typename Impl::if_c<
Impl::is_same<typename t_dev::memory_space,
typename Device::memory_space>::value,
t_dev,
t_host>::type& view () const
{
return Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value,
t_dev,
t_host >::select (d_view , h_view);
}
/// \brief Update data on device or host only if data in the other
/// space has been marked as modified.
///
/// If \c Device is the same as this DualView's device type, then
/// copy data from host to device. Otherwise, copy data from device
/// to host. In either case, only copy if the source of the copy
/// has been modified.
///
/// This is a one-way synchronization only. If the target of the
/// copy has been modified, this operation will discard those
/// modifications. It will also reset both device and host modified
/// flags.
///
/// \note This method doesn't know on its own whether you modified
/// the data in either View. You must manually mark modified data
/// as modified, by calling the modify() method with the
/// appropriate template parameter.
template<class Device>
void sync( const typename Impl::enable_if<
( Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) ||
( Impl::is_same< Device , int>::value)
, int >::type& = 0)
{
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value ,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
deep_copy (d_view, h_view);
modified_host() = modified_device() = 0;
}
} else { // hopefully Device is the same as DualView's host type
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
deep_copy (h_view, d_view);
modified_host() = modified_device() = 0;
}
}
if(Impl::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) {
t_dev::execution_space::fence();
t_host::execution_space::fence();
}
}
template<class Device>
void sync ( const typename Impl::enable_if<
( ! Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) ||
( Impl::is_same< Device , int>::value)
, int >::type& = 0 )
{
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
}
} else { // hopefully Device is the same as DualView's host type
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
}
}
}
template<class Device>
bool need_sync()
{
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value ,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
return true;
}
} else { // hopefully Device is the same as DualView's host type
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
return true;
}
}
return false;
}
/// \brief Mark data as modified on the given device \c Device.
///
/// If \c Device is the same as this DualView's device type, then
/// mark the device's data as modified. Otherwise, mark the host's
/// data as modified.
template<class Device>
void modify () {
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
// Increment the device's modified count.
modified_device () = (modified_device () > modified_host () ?
modified_device () : modified_host ()) + 1;
} else { // hopefully Device is the same as DualView's host type
// Increment the host's modified count.
modified_host () = (modified_device () > modified_host () ?
modified_device () : modified_host ()) + 1;
}
}
//@}
//! \name Methods for reallocating or resizing the View objects.
//@{
/// \brief Reallocate both View objects.
///
/// This discards any existing contents of the objects, and resets
/// their modified flags. It does <i>not</i> copy the old contents
/// of either View into the new View objects.
void realloc( const size_t n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 ) {
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
h_view = create_mirror_view( d_view );
/* Reset dirty flags */
modified_device() = modified_host() = 0;
}
/// \brief Resize both views, copying old contents into new if necessary.
///
/// This method only copies the old contents into the new View
/// objects for the device which was last marked as modified.
void resize( const size_t n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 ) {
if(modified_device() >= modified_host()) {
/* Resize on Device */
::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
h_view = create_mirror_view( d_view );
/* Mark Device copy as modified */
modified_device() = modified_device()+1;
} else {
/* Realloc on Device */
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
t_host temp_view = create_mirror_view( d_view );
/* Remap on Host */
Kokkos::deep_copy( temp_view , h_view );
h_view = temp_view;
/* Mark Host copy as modified */
modified_host() = modified_host()+1;
}
}
//@}
//! \name Methods for getting capacity, stride, or dimension(s).
//@{
//! The allocation size (same as Kokkos::View::capacity).
size_t capacity() const {
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
return d_view.span();
#else
return d_view.capacity();
#endif
}
//! Get stride(s) for each dimension.
template< typename iType>
void stride(iType* stride_) const {
d_view.stride(stride_);
}
/* \brief return size of dimension 0 */
size_t dimension_0() const {return d_view.dimension_0();}
/* \brief return size of dimension 1 */
size_t dimension_1() const {return d_view.dimension_1();}
/* \brief return size of dimension 2 */
size_t dimension_2() const {return d_view.dimension_2();}
/* \brief return size of dimension 3 */
size_t dimension_3() const {return d_view.dimension_3();}
/* \brief return size of dimension 4 */
size_t dimension_4() const {return d_view.dimension_4();}
/* \brief return size of dimension 5 */
size_t dimension_5() const {return d_view.dimension_5();}
/* \brief return size of dimension 6 */
size_t dimension_6() const {return d_view.dimension_6();}
/* \brief return size of dimension 7 */
size_t dimension_7() const {return d_view.dimension_7();}
//@}
};
} // namespace Kokkos
//
// Partial specializations of Kokkos::subview() for DualView objects.
//
namespace Kokkos {
namespace Impl {
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
>
struct ViewSubview< DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
{
private:
typedef DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > SrcViewType ;
enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
// The source view rank must be equal to the input argument rank
// Once a void argument is encountered all subsequent arguments must be void.
enum { InputRank =
Impl::StaticAssert<( SrcViewType::rank ==
( V0 ? 0 : (
V1 ? 1 : (
V2 ? 2 : (
V3 ? 3 : (
V4 ? 4 : (
V5 ? 5 : (
V6 ? 6 : (
V7 ? 7 : 8 ))))))) ))
&&
( SrcViewType::rank ==
( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
>::value ? SrcViewType::rank : 0 };
enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
// Reverse
enum { R0_rev = 0 == InputRank ? 0u : (
1 == InputRank ? unsigned(R0) : (
2 == InputRank ? unsigned(R1) : (
3 == InputRank ? unsigned(R2) : (
4 == InputRank ? unsigned(R3) : (
5 == InputRank ? unsigned(R4) : (
6 == InputRank ? unsigned(R5) : (
7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
typedef typename SrcViewType::array_layout SrcViewLayout ;
// Choose array layout, attempting to preserve original layout if at all possible.
typedef typename Impl::if_c<
( // Same Layout IF
// OutputRank 0
( OutputRank == 0 )
||
// OutputRank 1 or 2, InputLayout Left, Interval 0
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
// Choose data type as a purely dynamic rank array to accomodate a runtime range.
typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
typename SrcViewType::value_type ********
>::type >::type >::type >::type >::type >::type >::type >::type OutputData ;
// Choose space.
// If the source view's template arg1 or arg2 is a space then use it,
// otherwise use the source view's execution space.
typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::execution_space
>::type >::type OutputSpace ;
public:
// If keeping the layout then match non-data type arguments
// else keep execution space and memory traits.
typedef typename
Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
, Kokkos::DualView< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type >
, Kokkos::DualView< OutputData , OutputViewLayout , OutputSpace
, typename SrcViewType::memory_traits >
>::type type ;
};
} /* namespace Impl */
} /* namespace Kokkos */
namespace Kokkos {
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , void , void , void
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , void , void , void
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0);
sub_view.h_view = subview(src.h_view,arg0);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , void , void
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , void , void
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1);
sub_view.h_view = subview(src.h_view,arg0,arg1);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , void
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , void
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , void , void ,void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 , class ArgType6 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 ,
const ArgType6 & arg6 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , ArgType7
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 ,
const ArgType6 & arg6 ,
const ArgType7 & arg7 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , ArgType7
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
//
// Partial specialization of Kokkos::deep_copy() for DualView objects.
//
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
void
deep_copy (DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
const DualView<ST,SL,SD,SM>& src )
{
if (src.modified_device () >= src.modified_host ()) {
deep_copy (dst.d_view, src.d_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
} else {
deep_copy (dst.h_view, src.h_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
}
}
template< class ExecutionSpace ,
class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
void
deep_copy (const ExecutionSpace& exec ,
DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
const DualView<ST,SL,SD,SM>& src )
{
if (src.modified_device () >= src.modified_host ()) {
deep_copy (exec, dst.d_view, src.d_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
} else {
deep_copy (exec, dst.h_view, src.h_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
}
}
} // namespace Kokkos
#endif

View File

@ -1,173 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_FUNCTIONAL_HPP
#define KOKKOS_FUNCTIONAL_HPP
#include <Kokkos_Macros.hpp>
#include <impl/Kokkos_Functional_impl.hpp>
namespace Kokkos {
// These should work for most types
template <typename T>
struct pod_hash
{
typedef T argument_type;
typedef T first_argument_type;
typedef uint32_t second_argument_type;
typedef uint32_t result_type;
KOKKOS_FORCEINLINE_FUNCTION
uint32_t operator()(T const & t) const
{ return Impl::MurmurHash3_x86_32( &t, sizeof(T), 0); }
KOKKOS_FORCEINLINE_FUNCTION
uint32_t operator()(T const & t, uint32_t seed) const
{ return Impl::MurmurHash3_x86_32( &t, sizeof(T), seed); }
};
template <typename T>
struct pod_equal_to
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return Impl::bitwise_equal(&a,&b); }
};
template <typename T>
struct pod_not_equal_to
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return !Impl::bitwise_equal(&a,&b); }
};
template <typename T>
struct equal_to
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a == b; }
};
template <typename T>
struct not_equal_to
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a != b; }
};
template <typename T>
struct greater
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a > b; }
};
template <typename T>
struct less
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a < b; }
};
template <typename T>
struct greater_equal
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a >= b; }
};
template <typename T>
struct less_equal
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a <= b; }
};
} // namespace Kokkos
#endif //KOKKOS_FUNCTIONAL_HPP

View File

@ -1,531 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SEGMENTED_VIEW_HPP_
#define KOKKOS_SEGMENTED_VIEW_HPP_
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
#include <cstdio>
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
namespace Kokkos {
namespace Experimental {
namespace Impl {
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
struct delete_segmented_view;
template<class MemorySpace>
inline
void DeviceSetAllocatableMemorySize(size_t) {}
#if defined( KOKKOS_HAVE_CUDA )
template<>
inline
void DeviceSetAllocatableMemorySize<Kokkos::CudaSpace>(size_t size) {
#ifdef __CUDACC__
size_t size_limit;
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
if(size_limit<size)
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
#endif
}
template<>
inline
void DeviceSetAllocatableMemorySize<Kokkos::CudaUVMSpace>(size_t size) {
#ifdef __CUDACC__
size_t size_limit;
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
if(size_limit<size)
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
#endif
}
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
}
template< class DataType ,
class Arg1Type = void ,
class Arg2Type = void ,
class Arg3Type = void>
class SegmentedView : public Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
{
public:
//! \name Typedefs for device types and various Kokkos::View specializations.
//@{
typedef Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
//! The type of a Kokkos::View on the device.
typedef Kokkos::View< typename traits::data_type ,
typename traits::array_layout ,
typename traits::memory_space ,
Kokkos::MemoryUnmanaged > t_dev ;
private:
Kokkos::View<t_dev*,typename traits::memory_space> segments_;
Kokkos::View<int,typename traits::memory_space> realloc_lock;
Kokkos::View<int,typename traits::memory_space> nsegments_;
size_t segment_length_;
size_t segment_length_m1_;
int max_segments_;
int segment_length_log2;
// Dimensions, cardinality, capacity, and offset computation for
// multidimensional array view of contiguous memory.
// Inherits from Impl::Shape
typedef Kokkos::Impl::ViewOffset< typename traits::shape_type
, typename traits::array_layout
> offset_map_type ;
offset_map_type m_offset_map ;
typedef Kokkos::View< typename traits::array_intrinsic_type ,
typename traits::array_layout ,
typename traits::memory_space ,
typename traits::memory_traits > array_type ;
typedef Kokkos::View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::memory_space ,
typename traits::memory_traits > const_type ;
typedef Kokkos::View< typename traits::non_const_data_type ,
typename traits::array_layout ,
typename traits::memory_space ,
typename traits::memory_traits > non_const_type ;
typedef Kokkos::View< typename traits::non_const_data_type ,
typename traits::array_layout ,
HostSpace ,
void > HostMirror ;
template< bool Accessible >
KOKKOS_INLINE_FUNCTION
typename Kokkos::Impl::enable_if< Accessible , typename traits::size_type >::type
dimension_0_intern() const { return nsegments_() * segment_length_ ; }
template< bool Accessible >
KOKKOS_INLINE_FUNCTION
typename Kokkos::Impl::enable_if< ! Accessible , typename traits::size_type >::type
dimension_0_intern() const
{
// In Host space
int n = 0 ;
#if ! defined( __CUDA_ARCH__ )
Kokkos::Impl::DeepCopy< HostSpace , typename traits::memory_space >( & n , nsegments_.ptr_on_device() , sizeof(int) );
#endif
return n * segment_length_ ;
}
public:
enum { Rank = traits::rank };
KOKKOS_INLINE_FUNCTION offset_map_type shape() const { return m_offset_map ; }
/* \brief return (current) size of dimension 0 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const {
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
int n = SegmentedView::dimension_0_intern< Accessible >();
return n ;
}
/* \brief return size of dimension 1 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; }
/* \brief return size of dimension 2 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; }
/* \brief return size of dimension 3 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; }
/* \brief return size of dimension 4 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; }
/* \brief return size of dimension 5 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; }
/* \brief return size of dimension 6 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; }
/* \brief return size of dimension 7 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; }
/* \brief return size of dimension 2 */
KOKKOS_INLINE_FUNCTION typename traits::size_type size() const {
return dimension_0() *
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7 ;
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
typename traits::size_type dimension( const iType & i ) const {
if(i==0)
return dimension_0();
else
return Kokkos::Impl::dimension( m_offset_map , i );
}
KOKKOS_INLINE_FUNCTION
typename traits::size_type capacity() {
return segments_.dimension_0() *
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7;
}
KOKKOS_INLINE_FUNCTION
typename traits::size_type get_num_segments() {
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
int n = SegmentedView::dimension_0_intern< Accessible >();
return n/segment_length_ ;
}
KOKKOS_INLINE_FUNCTION
typename traits::size_type get_max_segments() {
return max_segments_;
}
/// \brief Constructor that allocates View objects with an initial length of 0.
///
/// This constructor works mostly like the analogous constructor of View.
/// The first argument is a string label, which is entirely for your
/// benefit. (Different SegmentedView objects may have the same label if
/// you like.) The second argument 'view_length' is the size of the segments.
/// This number must be a power of two. The third argument n0 is the maximum
/// value for the first dimension of the segmented view. The maximal allocatable
/// number of Segments is thus: (n0+view_length-1)/view_length.
/// The arguments that follow are the other dimensions of the (1-7) of the
/// View objects. For example, for a View with 3 runtime dimensions,
/// the first 4 integer arguments will be nonzero:
/// SegmentedView("Name",32768,10000000,8,4). This allocates a SegmentedView
/// with a maximum of 306 segments of dimension (32768,8,4). The logical size of
/// the segmented view is (n,8,4) with n between 0 and 10000000.
/// You may omit the integer arguments that follow.
template< class LabelType >
SegmentedView(const LabelType & label ,
const size_t view_length ,
const size_t n0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0
): segment_length_(view_length),segment_length_m1_(view_length-1)
{
segment_length_log2 = -1;
size_t l = segment_length_;
while(l>0) {
l>>=1;
segment_length_log2++;
}
l = 1<<segment_length_log2;
if(l!=segment_length_)
Kokkos::Impl::throw_runtime_exception("Kokkos::SegmentedView requires a 'power of 2' segment length");
max_segments_ = (n0+segment_length_m1_)/segment_length_;
Impl::DeviceSetAllocatableMemorySize<typename traits::memory_space>(segment_length_*max_segments_*sizeof(typename traits::value_type));
segments_ = Kokkos::View<t_dev*,typename traits::execution_space>(label , max_segments_);
realloc_lock = Kokkos::View<int,typename traits::execution_space>("Lock");
nsegments_ = Kokkos::View<int,typename traits::execution_space>("nviews");
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n0*n1*n2*n3*n4*n5*n6*n7 );
}
KOKKOS_INLINE_FUNCTION
SegmentedView(const SegmentedView& src):
segments_(src.segments_),
realloc_lock (src.realloc_lock),
nsegments_ (src.nsegments_),
segment_length_(src.segment_length_),
segment_length_m1_(src.segment_length_m1_),
max_segments_ (src.max_segments_),
segment_length_log2(src.segment_length_log2),
m_offset_map (src.m_offset_map)
{}
KOKKOS_INLINE_FUNCTION
SegmentedView& operator= (const SegmentedView& src) {
segments_ = src.segments_;
realloc_lock = src.realloc_lock;
nsegments_ = src.nsegments_;
segment_length_= src.segment_length_;
segment_length_m1_= src.segment_length_m1_;
max_segments_ = src.max_segments_;
segment_length_log2= src.segment_length_log2;
m_offset_map = src.m_offset_map;
return *this;
}
~SegmentedView() {
if ( !segments_.tracker().ref_counting()) { return; }
size_t ref_count = segments_.tracker().ref_count();
if(ref_count == 1u) {
Kokkos::fence();
typename Kokkos::View<int,typename traits::execution_space>::HostMirror h_nviews("h_nviews");
Kokkos::deep_copy(h_nviews,nsegments_);
Kokkos::parallel_for(h_nviews(),Impl::delete_segmented_view<DataType , Arg1Type , Arg2Type, Arg3Type>(*this));
}
}
KOKKOS_INLINE_FUNCTION
t_dev get_segment(const int& i) const {
return segments_[i];
}
template< class MemberType>
KOKKOS_INLINE_FUNCTION
void grow (MemberType& team_member, const size_t& growSize) const {
if (growSize>max_segments_*segment_length_) {
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
return;
}
if(team_member.team_rank()==0) {
bool too_small = growSize > segment_length_ * nsegments_();
if (too_small) {
while(Kokkos::atomic_compare_exchange(&realloc_lock(),0,1) )
; // get the lock
too_small = growSize > segment_length_ * nsegments_(); // Recheck once we have the lock
if(too_small) {
while(too_small) {
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
typename traits::non_const_value_type* const ptr = new typename traits::non_const_value_type[alloc_size];
segments_(nsegments_()) =
t_dev(ptr,segment_length_,m_offset_map.N1,m_offset_map.N2,m_offset_map.N3,m_offset_map.N4,m_offset_map.N5,m_offset_map.N6,m_offset_map.N7);
nsegments_()++;
too_small = growSize > segment_length_ * nsegments_();
}
}
realloc_lock() = 0; //release the lock
}
}
team_member.team_barrier();
}
KOKKOS_INLINE_FUNCTION
void grow_non_thread_safe (const size_t& growSize) const {
if (growSize>max_segments_*segment_length_) {
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
return;
}
bool too_small = growSize > segment_length_ * nsegments_();
if(too_small) {
while(too_small) {
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
typename traits::non_const_value_type* const ptr =
new typename traits::non_const_value_type[alloc_size];
segments_(nsegments_()) =
t_dev (ptr, segment_length_, m_offset_map.N1, m_offset_map.N2,
m_offset_map.N3, m_offset_map.N4, m_offset_map.N5,
m_offset_map.N6, m_offset_map.N7);
nsegments_()++;
too_small = growSize > segment_length_ * nsegments_();
}
}
}
template< typename iType0 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value && traits::rank == 1 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_));
}
template< typename iType0 , typename iType1 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
traits::rank == 2 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1);
}
template< typename iType0 , typename iType1 , typename iType2 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
traits::rank == 3 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
traits::rank == 4 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
std::is_integral<iType4>::value &&
traits::rank == 5 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
std::is_integral<iType4>::value &&
std::is_integral<iType5>::value &&
traits::rank == 6 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 , typename iType6 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
std::is_integral<iType4>::value &&
std::is_integral<iType5>::value &&
std::is_integral<iType6>::value &&
traits::rank == 7 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 , typename iType6 , typename iType7 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
std::is_integral<iType4>::value &&
std::is_integral<iType5>::value &&
std::is_integral<iType6>::value &&
std::is_integral<iType7>::value &&
traits::rank == 8 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6,i7);
}
};
namespace Impl {
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
struct delete_segmented_view {
typedef SegmentedView<DataType , Arg1Type , Arg2Type, Arg3Type> view_type;
typedef typename view_type::execution_space execution_space;
view_type view_;
delete_segmented_view(view_type view):view_(view) {
}
KOKKOS_INLINE_FUNCTION
void operator() (int i) const {
delete [] view_.get_segment(i).ptr_on_device();
}
};
}
}
}
#endif
#endif

View File

@ -1,226 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STATICCRSGRAPH_HPP
#define KOKKOS_STATICCRSGRAPH_HPP
#include <string>
#include <vector>
#include <Kokkos_Core.hpp>
namespace Kokkos {
/// \class StaticCrsGraph
/// \brief Compressed row storage array.
///
/// \tparam DataType The type of stored entries. If a StaticCrsGraph is
/// used as the graph of a sparse matrix, then this is usually an
/// integer type, the type of the column indices in the sparse
/// matrix.
///
/// \tparam Arg1Type The second template parameter, corresponding
/// either to the Device type (if there are no more template
/// parameters) or to the Layout type (if there is at least one more
/// template parameter).
///
/// \tparam Arg2Type The third template parameter, which if provided
/// corresponds to the Device type.
///
/// \tparam SizeType The type of row offsets. Usually the default
/// parameter suffices. However, setting a nondefault value is
/// necessary in some cases, for example, if you want to have a
/// sparse matrices with dimensions (and therefore column indices)
/// that fit in \c int, but want to store more than <tt>INT_MAX</tt>
/// entries in the sparse matrix.
///
/// A row has a range of entries:
/// <ul>
/// <li> <tt> row_map[i0] <= entry < row_map[i0+1] </tt> </li>
/// <li> <tt> 0 <= i1 < row_map[i0+1] - row_map[i0] </tt> </li>
/// <li> <tt> entries( entry , i2 , i3 , ... ); </tt> </li>
/// <li> <tt> entries( row_map[i0] + i1 , i2 , i3 , ... ); </tt> </li>
/// </ul>
template< class DataType,
class Arg1Type,
class Arg2Type = void,
typename SizeType = typename ViewTraits<DataType*, Arg1Type, Arg2Type, void >::size_type>
class StaticCrsGraph {
private:
typedef ViewTraits<DataType*, Arg1Type, Arg2Type, void> traits;
public:
typedef DataType data_type;
typedef typename traits::array_layout array_layout;
typedef typename traits::execution_space execution_space;
typedef typename traits::device_type device_type;
typedef SizeType size_type;
typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type;
typedef StaticCrsGraph< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror;
typedef View< const size_type* , array_layout, device_type > row_map_type;
typedef View< DataType* , array_layout, device_type > entries_type;
entries_type entries;
row_map_type row_map;
//! Construct an empty view.
StaticCrsGraph () : entries(), row_map() {}
//! Copy constructor (shallow copy).
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map)
{}
template<class EntriesType, class RowMapType>
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_)
{}
/** \brief Assign to a view of the rhs array.
* If the old view is the last view
* then allocated memory is deallocated.
*/
StaticCrsGraph& operator= (const StaticCrsGraph& rhs) {
entries = rhs.entries;
row_map = rhs.row_map;
return *this;
}
/** \brief Destroy this view of the array.
* If the last view then allocated memory is deallocated.
*/
~StaticCrsGraph() {}
KOKKOS_INLINE_FUNCTION
size_type numRows() const {
return (row_map.dimension_0 () != 0) ?
row_map.dimension_0 () - static_cast<size_type> (1) :
static_cast<size_type> (0);
}
};
//----------------------------------------------------------------------------
template< class StaticCrsGraphType , class InputSizeType >
typename StaticCrsGraphType::staticcrsgraph_type
create_staticcrsgraph( const std::string & label ,
const std::vector< InputSizeType > & input );
template< class StaticCrsGraphType , class InputSizeType >
typename StaticCrsGraphType::staticcrsgraph_type
create_staticcrsgraph( const std::string & label ,
const std::vector< std::vector< InputSizeType > > & input );
//----------------------------------------------------------------------------
template< class DataType ,
class Arg1Type ,
class Arg2Type ,
typename SizeType >
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
template< class DataType ,
class Arg1Type ,
class Arg2Type ,
typename SizeType >
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#include <impl/Kokkos_StaticCrsGraph_factory.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class GraphType >
struct StaticCrsGraphMaximumEntry {
typedef typename GraphType::execution_space execution_space ;
typedef typename GraphType::data_type value_type ;
const typename GraphType::entries_type entries ;
StaticCrsGraphMaximumEntry( const GraphType & graph ) : entries( graph.entries ) {}
KOKKOS_INLINE_FUNCTION
void operator()( const unsigned i , value_type & update ) const
{ if ( update < entries(i) ) update = entries(i); }
KOKKOS_INLINE_FUNCTION
void init( value_type & update ) const
{ update = 0 ; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & update ,
volatile const value_type & input ) const
{ if ( update < input ) update = input ; }
};
}
template< class DataType, class Arg1Type, class Arg2Type, typename SizeType >
DataType maximum_entry( const StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > & graph )
{
typedef StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType> GraphType ;
typedef Impl::StaticCrsGraphMaximumEntry< GraphType > FunctorType ;
DataType result = 0 ;
Kokkos::parallel_reduce( graph.entries.dimension_0(),
FunctorType(graph), result );
return result ;
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_CRSARRAY_HPP */

View File

@ -1,848 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_UnorderedMap.hpp
/// \brief Declaration and definition of Kokkos::UnorderedMap.
///
/// This header file declares and defines Kokkos::UnorderedMap and its
/// related nonmember functions.
#ifndef KOKKOS_UNORDERED_MAP_HPP
#define KOKKOS_UNORDERED_MAP_HPP
#include <Kokkos_Core.hpp>
#include <Kokkos_Functional.hpp>
#include <Kokkos_Bitset.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_UnorderedMap_impl.hpp>
#include <iostream>
#include <stdint.h>
#include <stdexcept>
namespace Kokkos {
enum { UnorderedMapInvalidIndex = ~0u };
/// \brief First element of the return value of UnorderedMap::insert().
///
/// Inserting an element into an UnorderedMap is not guaranteed to
/// succeed. There are three possible conditions:
/// <ol>
/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually
/// means that the UnorderedMap ran out of space. </li>
/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key
/// did <i>not</i> exist in the table before. </li>
/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key
/// <i>did</i> exist in the table before. The new value was
/// ignored and the old value was left in place. </li>
/// </ol>
class UnorderedMapInsertResult
{
private:
enum Status{
SUCCESS = 1u << 31
, EXISTING = 1u << 30
, FREED_EXISTING = 1u << 29
, LIST_LENGTH_MASK = ~(SUCCESS | EXISTING | FREED_EXISTING)
};
public:
/// Did the map successful insert the key/value pair
KOKKOS_FORCEINLINE_FUNCTION
bool success() const { return (m_status & SUCCESS); }
/// Was the key already present in the map
KOKKOS_FORCEINLINE_FUNCTION
bool existing() const { return (m_status & EXISTING); }
/// Did the map fail to insert the key due to insufficent capacity
KOKKOS_FORCEINLINE_FUNCTION
bool failed() const { return m_index == UnorderedMapInvalidIndex; }
/// Did the map lose a race condition to insert a dupulicate key/value pair
/// where an index was claimed that needed to be released
KOKKOS_FORCEINLINE_FUNCTION
bool freed_existing() const { return (m_status & FREED_EXISTING); }
/// How many iterations through the insert loop did it take before the
/// map returned
KOKKOS_FORCEINLINE_FUNCTION
uint32_t list_position() const { return (m_status & LIST_LENGTH_MASK); }
/// Index where the key can be found as long as the insert did not fail
KOKKOS_FORCEINLINE_FUNCTION
uint32_t index() const { return m_index; }
KOKKOS_FORCEINLINE_FUNCTION
UnorderedMapInsertResult()
: m_index(UnorderedMapInvalidIndex)
, m_status(0)
{}
KOKKOS_FORCEINLINE_FUNCTION
void increment_list_position()
{
m_status += (list_position() < LIST_LENGTH_MASK) ? 1u : 0u;
}
KOKKOS_FORCEINLINE_FUNCTION
void set_existing(uint32_t i, bool arg_freed_existing)
{
m_index = i;
m_status = EXISTING | (arg_freed_existing ? FREED_EXISTING : 0u) | list_position();
}
KOKKOS_FORCEINLINE_FUNCTION
void set_success(uint32_t i)
{
m_index = i;
m_status = SUCCESS | list_position();
}
private:
uint32_t m_index;
uint32_t m_status;
};
/// \class UnorderedMap
/// \brief Thread-safe, performance-portable lookup table.
///
/// This class provides a lookup table. In terms of functionality,
/// this class compares to std::unordered_map (new in C++11).
/// "Unordered" means that keys are not stored in any particular
/// order, unlike (for example) std::map. "Thread-safe" means that
/// lookups, insertion, and deletion are safe to call by multiple
/// threads in parallel. "Performance-portable" means that parallel
/// performance of these operations is reasonable, on multiple
/// hardware platforms. Platforms on which performance has been
/// tested include conventional Intel x86 multicore processors, Intel
/// Xeon Phi ("MIC"), and NVIDIA GPUs.
///
/// Parallel performance portability entails design decisions that
/// might differ from one's expectation for a sequential interface.
/// This particularly affects insertion of single elements. In an
/// interface intended for sequential use, insertion might reallocate
/// memory if the original allocation did not suffice to hold the new
/// element. In this class, insertion does <i>not</i> reallocate
/// memory. This means that it might fail. insert() returns an enum
/// which indicates whether the insert failed. There are three
/// possible conditions:
/// <ol>
/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually
/// means that the UnorderedMap ran out of space. </li>
/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key
/// did <i>not</i> exist in the table before. </li>
/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key
/// <i>did</i> exist in the table before. The new value was
/// ignored and the old value was left in place. </li>
/// </ol>
///
/// \tparam Key Type of keys of the lookup table. If \c const, users
/// are not allowed to add or remove keys, though they are allowed
/// to change values. In that case, the implementation may make
/// optimizations specific to the <tt>Device</tt>. For example, if
/// <tt>Device</tt> is \c Cuda, it may use texture fetches to access
/// keys.
///
/// \tparam Value Type of values stored in the lookup table. You may use
/// \c void here, in which case the table will be a set of keys. If
/// \c const, users are not allowed to change entries.
/// In that case, the implementation may make
/// optimizations specific to the \c Device, such as using texture
/// fetches to access values.
///
/// \tparam Device The Kokkos Device type.
///
/// \tparam Hasher Definition of the hash function for instances of
/// <tt>Key</tt>. The default will calculate a bitwise hash.
///
/// \tparam EqualTo Definition of the equality function for instances of
/// <tt>Key</tt>. The default will do a bitwise equality comparison.
///
template < typename Key
, typename Value
, typename Device = Kokkos::DefaultExecutionSpace
, typename Hasher = pod_hash<typename Impl::remove_const<Key>::type>
, typename EqualTo = pod_equal_to<typename Impl::remove_const<Key>::type>
>
class UnorderedMap
{
private:
typedef typename ViewTraits<Key,Device,void,void>::host_mirror_space host_mirror_space ;
public:
//! \name Public types and constants
//@{
//key_types
typedef Key declared_key_type;
typedef typename Impl::remove_const<declared_key_type>::type key_type;
typedef typename Impl::add_const<key_type>::type const_key_type;
//value_types
typedef Value declared_value_type;
typedef typename Impl::remove_const<declared_value_type>::type value_type;
typedef typename Impl::add_const<value_type>::type const_value_type;
typedef Device execution_space;
typedef Hasher hasher_type;
typedef EqualTo equal_to_type;
typedef uint32_t size_type;
//map_types
typedef UnorderedMap<declared_key_type,declared_value_type,execution_space,hasher_type,equal_to_type> declared_map_type;
typedef UnorderedMap<key_type,value_type,execution_space,hasher_type,equal_to_type> insertable_map_type;
typedef UnorderedMap<const_key_type,value_type,execution_space,hasher_type,equal_to_type> modifiable_map_type;
typedef UnorderedMap<const_key_type,const_value_type,execution_space,hasher_type,equal_to_type> const_map_type;
static const bool is_set = Impl::is_same<void,value_type>::value;
static const bool has_const_key = Impl::is_same<const_key_type,declared_key_type>::value;
static const bool has_const_value = is_set || Impl::is_same<const_value_type,declared_value_type>::value;
static const bool is_insertable_map = !has_const_key && (is_set || !has_const_value);
static const bool is_modifiable_map = has_const_key && !has_const_value;
static const bool is_const_map = has_const_key && has_const_value;
typedef UnorderedMapInsertResult insert_result;
typedef UnorderedMap<Key,Value,host_mirror_space,Hasher,EqualTo> HostMirror;
typedef Impl::UnorderedMapHistogram<const_map_type> histogram_type;
//@}
private:
enum { invalid_index = ~static_cast<size_type>(0) };
typedef typename Impl::if_c< is_set, int, declared_value_type>::type impl_value_type;
typedef typename Impl::if_c< is_insertable_map
, View< key_type *, execution_space>
, View< const key_type *, execution_space, MemoryTraits<RandomAccess> >
>::type key_type_view;
typedef typename Impl::if_c< is_insertable_map || is_modifiable_map
, View< impl_value_type *, execution_space>
, View< const impl_value_type *, execution_space, MemoryTraits<RandomAccess> >
>::type value_type_view;
typedef typename Impl::if_c< is_insertable_map
, View< size_type *, execution_space>
, View< const size_type *, execution_space, MemoryTraits<RandomAccess> >
>::type size_type_view;
typedef typename Impl::if_c< is_insertable_map
, Bitset< execution_space >
, ConstBitset< execution_space>
>::type bitset_type;
enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 };
enum { num_scalars = 3 };
typedef View< int[num_scalars], LayoutLeft, execution_space> scalars_view;
public:
//! \name Public member functions
//@{
UnorderedMap()
: m_bounded_insert()
, m_hasher()
, m_equal_to()
, m_size()
, m_available_indexes()
, m_hash_lists()
, m_next_index()
, m_keys()
, m_values()
, m_scalars()
{}
/// \brief Constructor
///
/// \param capacity_hint [in] Initial guess of how many unique keys will be inserted into the map
/// \param hash [in] Hasher function for \c Key instances. The
/// default value usually suffices.
UnorderedMap( size_type capacity_hint, hasher_type hasher = hasher_type(), equal_to_type equal_to = equal_to_type() )
: m_bounded_insert(true)
, m_hasher(hasher)
, m_equal_to(equal_to)
, m_size()
, m_available_indexes(calculate_capacity(capacity_hint))
, m_hash_lists(ViewAllocateWithoutInitializing("UnorderedMap hash list"), Impl::find_hash_size(capacity()))
, m_next_index(ViewAllocateWithoutInitializing("UnorderedMap next index"), capacity()+1) // +1 so that the *_at functions can always return a valid reference
, m_keys("UnorderedMap keys",capacity()+1)
, m_values("UnorderedMap values",(is_set? 1 : capacity()+1))
, m_scalars("UnorderedMap scalars")
{
if (!is_insertable_map) {
throw std::runtime_error("Cannot construct a non-insertable (i.e. const key_type) unordered_map");
}
Kokkos::deep_copy(m_hash_lists, invalid_index);
Kokkos::deep_copy(m_next_index, invalid_index);
}
void reset_failed_insert_flag()
{
reset_flag(failed_insert_idx);
}
histogram_type get_histogram()
{
return histogram_type(*this);
}
//! Clear all entries in the table.
void clear()
{
m_bounded_insert = true;
if (capacity() == 0) return;
m_available_indexes.clear();
Kokkos::deep_copy(m_hash_lists, invalid_index);
Kokkos::deep_copy(m_next_index, invalid_index);
{
const key_type tmp = key_type();
Kokkos::deep_copy(m_keys,tmp);
}
if (is_set){
const impl_value_type tmp = impl_value_type();
Kokkos::deep_copy(m_values,tmp);
}
{
Kokkos::deep_copy(m_scalars, 0);
}
}
/// \brief Change the capacity of the the map
///
/// If there are no failed inserts the current size of the map will
/// be used as a lower bound for the input capacity.
/// If the map is not empty and does not have failed inserts
/// and the capacity changes then the current data is copied
/// into the resized / rehashed map.
///
/// This is <i>not</i> a device function; it may <i>not</i> be
/// called in a parallel kernel.
bool rehash(size_type requested_capacity = 0)
{
const bool bounded_insert = (capacity() == 0) || (size() == 0u);
return rehash(requested_capacity, bounded_insert );
}
bool rehash(size_type requested_capacity, bool bounded_insert)
{
if(!is_insertable_map) return false;
const size_type curr_size = size();
requested_capacity = (requested_capacity < curr_size) ? curr_size : requested_capacity;
insertable_map_type tmp(requested_capacity, m_hasher, m_equal_to);
if (curr_size) {
tmp.m_bounded_insert = false;
Impl::UnorderedMapRehash<insertable_map_type> f(tmp,*this);
f.apply();
}
tmp.m_bounded_insert = bounded_insert;
*this = tmp;
return true;
}
/// \brief The number of entries in the table.
///
/// This method has undefined behavior when erasable() is true.
///
/// Note that this is not a device function; it cannot be called in
/// a parallel kernel. The value is not stored as a variable; it
/// must be computed.
size_type size() const
{
if( capacity() == 0u ) return 0u;
if (modified()) {
m_size = m_available_indexes.count();
reset_flag(modified_idx);
}
return m_size;
}
/// \brief The current number of failed insert() calls.
///
/// This is <i>not</i> a device function; it may <i>not</i> be
/// called in a parallel kernel. The value is not stored as a
/// variable; it must be computed.
bool failed_insert() const
{
return get_flag(failed_insert_idx);
}
bool erasable() const
{
return is_insertable_map ? get_flag(erasable_idx) : false;
}
bool begin_erase()
{
bool result = !erasable();
if (is_insertable_map && result) {
execution_space::fence();
set_flag(erasable_idx);
execution_space::fence();
}
return result;
}
bool end_erase()
{
bool result = erasable();
if (is_insertable_map && result) {
execution_space::fence();
Impl::UnorderedMapErase<declared_map_type> f(*this);
f.apply();
execution_space::fence();
reset_flag(erasable_idx);
}
return result;
}
/// \brief The maximum number of entries that the table can hold.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_FORCEINLINE_FUNCTION
size_type capacity() const
{ return m_available_indexes.size(); }
/// \brief The number of hash table "buckets."
///
/// This is different than the number of entries that the table can
/// hold. Each key hashes to an index in [0, hash_capacity() - 1].
/// That index can hold zero or more entries. This class decides
/// what hash_capacity() should be, given the user's upper bound on
/// the number of entries the table must be able to hold.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_INLINE_FUNCTION
size_type hash_capacity() const
{ return m_hash_lists.dimension_0(); }
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel. As discussed in the class documentation, it need not
/// succeed. The return value tells you if it did.
///
/// \param k [in] The key to attempt to insert.
/// \param v [in] The corresponding value to attempt to insert. If
/// using this class as a set (with Value = void), then you need not
/// provide this value.
KOKKOS_INLINE_FUNCTION
insert_result insert(key_type const& k, impl_value_type const&v = impl_value_type()) const
{
insert_result result;
if ( !is_insertable_map || capacity() == 0u || m_scalars((int)erasable_idx) ) {
return result;
}
if ( !m_scalars((int)modified_idx) ) {
m_scalars((int)modified_idx) = true;
}
int volatile & failed_insert_ref = m_scalars((int)failed_insert_idx) ;
const size_type hash_value = m_hasher(k);
const size_type hash_list = hash_value % m_hash_lists.dimension_0();
size_type * curr_ptr = & m_hash_lists[ hash_list ];
size_type new_index = invalid_index ;
// Force integer multiply to long
size_type index_hint = static_cast<size_type>( (static_cast<double>(hash_list) * capacity()) / m_hash_lists.dimension_0());
size_type find_attempts = 0;
enum { bounded_find_attempts = 32u };
const size_type max_attempts = (m_bounded_insert && (bounded_find_attempts < m_available_indexes.max_hint()) ) ?
bounded_find_attempts :
m_available_indexes.max_hint();
bool not_done = true ;
#if defined( __MIC__ )
#pragma noprefetch
#endif
while ( not_done ) {
// Continue searching the unordered list for this key,
// list will only be appended during insert phase.
// Need volatile_load as other threads may be appending.
size_type curr = volatile_load(curr_ptr);
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
#if defined( __MIC__ )
#pragma noprefetch
#endif
while ( curr != invalid_index && ! m_equal_to( volatile_load(&m_keys[curr]), k) ) {
result.increment_list_position();
index_hint = curr;
curr_ptr = &m_next_index[curr];
curr = volatile_load(curr_ptr);
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
}
//------------------------------------------------------------
// If key already present then return that index.
if ( curr != invalid_index ) {
const bool free_existing = new_index != invalid_index;
if ( free_existing ) {
// Previously claimed an unused entry that was not inserted.
// Release this unused entry immediately.
if (!m_available_indexes.reset(new_index) ) {
printf("Unable to free existing\n");
}
}
result.set_existing(curr, free_existing);
not_done = false ;
}
//------------------------------------------------------------
// Key is not currently in the map.
// If the thread has claimed an entry try to insert now.
else {
//------------------------------------------------------------
// If have not already claimed an unused entry then do so now.
if (new_index == invalid_index) {
bool found = false;
// use the hash_list as the flag for the search direction
Kokkos::tie(found, index_hint) = m_available_indexes.find_any_unset_near( index_hint, hash_list );
// found and index and this thread set it
if ( !found && ++find_attempts >= max_attempts ) {
failed_insert_ref = true;
not_done = false ;
}
else if (m_available_indexes.set(index_hint) ) {
new_index = index_hint;
// Set key and value
KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_keys[new_index]);
m_keys[new_index] = k ;
if (!is_set) {
KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_values[new_index]);
m_values[new_index] = v ;
}
// Do not proceed until key and value are updated in global memory
memory_fence();
}
}
else if (failed_insert_ref) {
not_done = false;
}
// Attempt to append claimed entry into the list.
// Another thread may also be trying to append the same list so protect with atomic.
if ( new_index != invalid_index &&
curr == atomic_compare_exchange(curr_ptr, static_cast<size_type>(invalid_index), new_index) ) {
// Succeeded in appending
result.set_success(new_index);
not_done = false ;
}
}
} // while ( not_done )
return result ;
}
KOKKOS_INLINE_FUNCTION
bool erase(key_type const& k) const
{
bool result = false;
if(is_insertable_map && 0u < capacity() && m_scalars((int)erasable_idx)) {
if ( ! m_scalars((int)modified_idx) ) {
m_scalars((int)modified_idx) = true;
}
size_type index = find(k);
if (valid_at(index)) {
m_available_indexes.reset(index);
result = true;
}
}
return result;
}
/// \brief Find the given key \c k, if it exists in the table.
///
/// \return If the key exists in the table, the index of the
/// value corresponding to that key; otherwise, an invalid index.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_INLINE_FUNCTION
size_type find( const key_type & k) const
{
size_type curr = 0u < capacity() ? m_hash_lists( m_hasher(k) % m_hash_lists.dimension_0() ) : invalid_index ;
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
while (curr != invalid_index && !m_equal_to( m_keys[curr], k) ) {
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
curr = m_next_index[curr];
}
return curr;
}
/// \brief Does the key exist in the map
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_INLINE_FUNCTION
bool exists( const key_type & k) const
{
return valid_at(find(k));
}
/// \brief Get the value with \c i as its direct index.
///
/// \param i [in] Index directly into the array of entries.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
///
/// 'const value_type' via Cuda texture fetch must return by value.
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::if_c< (is_set || has_const_value), impl_value_type, impl_value_type &>::type
value_at(size_type i) const
{
return m_values[ is_set ? 0 : (i < capacity() ? i : capacity()) ];
}
/// \brief Get the key with \c i as its direct index.
///
/// \param i [in] Index directly into the array of entries.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_FORCEINLINE_FUNCTION
key_type key_at(size_type i) const
{
return m_keys[ i < capacity() ? i : capacity() ];
}
KOKKOS_FORCEINLINE_FUNCTION
bool valid_at(size_type i) const
{
return m_available_indexes.test(i);
}
template <typename SKey, typename SValue>
UnorderedMap( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src,
typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value,int>::type = 0
)
: m_bounded_insert(src.m_bounded_insert)
, m_hasher(src.m_hasher)
, m_equal_to(src.m_equal_to)
, m_size(src.m_size)
, m_available_indexes(src.m_available_indexes)
, m_hash_lists(src.m_hash_lists)
, m_next_index(src.m_next_index)
, m_keys(src.m_keys)
, m_values(src.m_values)
, m_scalars(src.m_scalars)
{}
template <typename SKey, typename SValue>
typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value
,declared_map_type & >::type
operator=( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src)
{
m_bounded_insert = src.m_bounded_insert;
m_hasher = src.m_hasher;
m_equal_to = src.m_equal_to;
m_size = src.m_size;
m_available_indexes = src.m_available_indexes;
m_hash_lists = src.m_hash_lists;
m_next_index = src.m_next_index;
m_keys = src.m_keys;
m_values = src.m_values;
m_scalars = src.m_scalars;
return *this;
}
template <typename SKey, typename SValue, typename SDevice>
typename Impl::enable_if< Impl::is_same< typename Impl::remove_const<SKey>::type, key_type>::value &&
Impl::is_same< typename Impl::remove_const<SValue>::type, value_type>::value
>::type
create_copy_view( UnorderedMap<SKey, SValue, SDevice, Hasher,EqualTo> const& src)
{
if (m_hash_lists.ptr_on_device() != src.m_hash_lists.ptr_on_device()) {
insertable_map_type tmp;
tmp.m_bounded_insert = src.m_bounded_insert;
tmp.m_hasher = src.m_hasher;
tmp.m_equal_to = src.m_equal_to;
tmp.m_size = src.size();
tmp.m_available_indexes = bitset_type( src.capacity() );
tmp.m_hash_lists = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap hash list"), src.m_hash_lists.dimension_0() );
tmp.m_next_index = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap next index"), src.m_next_index.dimension_0() );
tmp.m_keys = key_type_view( ViewAllocateWithoutInitializing("UnorderedMap keys"), src.m_keys.dimension_0() );
tmp.m_values = value_type_view( ViewAllocateWithoutInitializing("UnorderedMap values"), src.m_values.dimension_0() );
tmp.m_scalars = scalars_view("UnorderedMap scalars");
Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes);
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, typename SDevice::memory_space > raw_deep_copy;
raw_deep_copy(tmp.m_hash_lists.ptr_on_device(), src.m_hash_lists.ptr_on_device(), sizeof(size_type)*src.m_hash_lists.dimension_0());
raw_deep_copy(tmp.m_next_index.ptr_on_device(), src.m_next_index.ptr_on_device(), sizeof(size_type)*src.m_next_index.dimension_0());
raw_deep_copy(tmp.m_keys.ptr_on_device(), src.m_keys.ptr_on_device(), sizeof(key_type)*src.m_keys.dimension_0());
if (!is_set) {
raw_deep_copy(tmp.m_values.ptr_on_device(), src.m_values.ptr_on_device(), sizeof(impl_value_type)*src.m_values.dimension_0());
}
raw_deep_copy(tmp.m_scalars.ptr_on_device(), src.m_scalars.ptr_on_device(), sizeof(int)*num_scalars );
*this = tmp;
}
}
//@}
private: // private member functions
bool modified() const
{
return get_flag(modified_idx);
}
void set_flag(int flag) const
{
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
const int true_ = true;
raw_deep_copy(m_scalars.ptr_on_device() + flag, &true_, sizeof(int));
}
void reset_flag(int flag) const
{
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
const int false_ = false;
raw_deep_copy(m_scalars.ptr_on_device() + flag, &false_, sizeof(int));
}
bool get_flag(int flag) const
{
typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename execution_space::memory_space > raw_deep_copy;
int result = false;
raw_deep_copy(&result, m_scalars.ptr_on_device() + flag, sizeof(int));
return result;
}
static uint32_t calculate_capacity(uint32_t capacity_hint)
{
// increase by 16% and round to nears multiple of 128
return capacity_hint ? ((static_cast<uint32_t>(7ull*capacity_hint/6u) + 127u)/128u)*128u : 128u;
}
private: // private members
bool m_bounded_insert;
hasher_type m_hasher;
equal_to_type m_equal_to;
mutable size_type m_size;
bitset_type m_available_indexes;
size_type_view m_hash_lists;
size_type_view m_next_index;
key_type_view m_keys;
value_type_view m_values;
scalars_view m_scalars;
template <typename KKey, typename VValue, typename DDevice, typename HHash, typename EEqualTo>
friend class UnorderedMap;
template <typename UMap>
friend struct Impl::UnorderedMapErase;
template <typename UMap>
friend struct Impl::UnorderedMapHistogram;
template <typename UMap>
friend struct Impl::UnorderedMapPrint;
};
// Specialization of deep_copy for two UnorderedMap objects.
template < typename DKey, typename DT, typename DDevice
, typename SKey, typename ST, typename SDevice
, typename Hasher, typename EqualTo >
inline void deep_copy( UnorderedMap<DKey, DT, DDevice, Hasher, EqualTo> & dst
, const UnorderedMap<SKey, ST, SDevice, Hasher, EqualTo> & src )
{
dst.create_copy_view(src);
}
} // namespace Kokkos
#endif //KOKKOS_UNORDERED_MAP_HPP

View File

@ -1,287 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VECTOR_HPP
#define KOKKOS_VECTOR_HPP
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_DualView.hpp>
/* Drop in replacement for std::vector based on Kokkos::DualView
* Most functions only work on the host (it will not compile if called from device kernel)
*
*/
namespace Kokkos {
template <typename Scalar, class Space = Kokkos::DefaultExecutionSpace >
class vector : public DualView<Scalar*,LayoutLeft,Space> {
public:
typedef typename Space::memory_space memory_space;
typedef typename Space::execution_space execution_space;
typedef typename Kokkos::Device<execution_space,memory_space> device_type;
typedef Scalar value_type;
typedef Scalar* pointer;
typedef const Scalar* const_pointer;
typedef Scalar* reference;
typedef const Scalar* const_reference;
typedef Scalar* iterator;
typedef const Scalar* const_iterator;
private:
size_t _size;
typedef size_t size_type;
float _extra_storage;
typedef DualView<Scalar*,LayoutLeft,Space> DV;
public:
#ifdef KOKKOS_CUDA_USE_UVM
KOKKOS_INLINE_FUNCTION Scalar& operator() (int i) const {return DV::h_view(i);};
KOKKOS_INLINE_FUNCTION Scalar& operator[] (int i) const {return DV::h_view(i);};
#else
inline Scalar& operator() (int i) const {return DV::h_view(i);};
inline Scalar& operator[] (int i) const {return DV::h_view(i);};
#endif
/* Member functions which behave like std::vector functions */
vector():DV() {
_size = 0;
_extra_storage = 1.1;
DV::modified_host() = 1;
};
vector(int n, Scalar val=Scalar()):DualView<Scalar*,LayoutLeft,Space>("Vector",size_t(n*(1.1))) {
_size = n;
_extra_storage = 1.1;
DV::modified_host() = 1;
assign(n,val);
}
void resize(size_t n) {
if(n>=capacity())
DV::resize(size_t (n*_extra_storage));
_size = n;
}
void resize(size_t n, const Scalar& val) {
assign(n,val);
}
void assign (size_t n, const Scalar& val) {
/* Resize if necessary (behavour of std:vector) */
if(n>capacity())
DV::resize(size_t (n*_extra_storage));
_size = n;
/* Assign value either on host or on device */
if( DV::modified_host() >= DV::modified_device() ) {
set_functor_host f(DV::h_view,val);
parallel_for(n,f);
DV::t_host::execution_space::fence();
DV::modified_host()++;
} else {
set_functor f(DV::d_view,val);
parallel_for(n,f);
DV::t_dev::execution_space::fence();
DV::modified_device()++;
}
}
void reserve(size_t n) {
DV::resize(size_t (n*_extra_storage));
}
void push_back(Scalar val) {
DV::modified_host()++;
if(_size == capacity()) {
size_t new_size = _size*_extra_storage;
if(new_size == _size) new_size++;
DV::resize(new_size);
}
DV::h_view(_size) = val;
_size++;
};
void pop_back() {
_size--;
};
void clear() {
_size = 0;
}
size_type size() const {return _size;};
size_type max_size() const {return 2000000000;}
size_type capacity() const {return DV::capacity();};
bool empty() const {return _size==0;};
iterator begin() const {return &DV::h_view(0);};
iterator end() const {return &DV::h_view(_size);};
/* std::algorithms wich work originally with iterators, here they are implemented as member functions */
size_t
lower_bound (const size_t& start,
const size_t& theEnd,
const Scalar& comp_val) const
{
int lower = start; // FIXME (mfh 24 Apr 2014) narrowing conversion
int upper = _size > theEnd? theEnd : _size-1; // FIXME (mfh 24 Apr 2014) narrowing conversion
if (upper <= lower) {
return theEnd;
}
Scalar lower_val = DV::h_view(lower);
Scalar upper_val = DV::h_view(upper);
size_t idx = (upper+lower)/2;
Scalar val = DV::h_view(idx);
if(val>upper_val) return upper;
if(val<lower_val) return start;
while(upper>lower) {
if(comp_val>val) {
lower = ++idx;
} else {
upper = idx;
}
idx = (upper+lower)/2;
val = DV::h_view(idx);
}
return idx;
}
bool is_sorted() {
for(int i=0;i<_size-1;i++) {
if(DV::h_view(i)>DV::h_view(i+1)) return false;
}
return true;
}
iterator find(Scalar val) const {
if(_size == 0) return end();
int upper,lower,current;
current = _size/2;
upper = _size-1;
lower = 0;
if((val<DV::h_view(0)) || (val>DV::h_view(_size-1)) ) return end();
while(upper>lower)
{
if(val>DV::h_view(current)) lower = current+1;
else upper = current;
current = (upper+lower)/2;
}
if(val==DV::h_view(current)) return &DV::h_view(current);
else return end();
}
/* Additional functions for data management */
void device_to_host(){
deep_copy(DV::h_view,DV::d_view);
}
void host_to_device() const {
deep_copy(DV::d_view,DV::h_view);
}
void on_host() {
DV::modified_host() = DV::modified_device() + 1;
}
void on_device() {
DV::modified_device() = DV::modified_host() + 1;
}
void set_overallocation(float extra) {
_extra_storage = 1.0 + extra;
}
public:
struct set_functor {
typedef typename DV::t_dev::execution_space execution_space;
typename DV::t_dev _data;
Scalar _val;
set_functor(typename DV::t_dev data, Scalar val) :
_data(data),_val(val) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
_data(i) = _val;
}
};
struct set_functor_host {
typedef typename DV::t_host::execution_space execution_space;
typename DV::t_host _data;
Scalar _val;
set_functor_host(typename DV::t_host data, Scalar val) :
_data(data),_val(val) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
_data(i) = _val;
}
};
};
}
#endif

View File

@ -1,173 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BITSET_IMPL_HPP
#define KOKKOS_BITSET_IMPL_HPP
#include <Kokkos_Macros.hpp>
#include <stdint.h>
#include <cstdio>
#include <climits>
#include <iostream>
#include <iomanip>
namespace Kokkos { namespace Impl {
KOKKOS_FORCEINLINE_FUNCTION
unsigned rotate_right(unsigned i, int r)
{
enum { size = static_cast<int>(sizeof(unsigned)*CHAR_BIT) };
return r ? ((i >> r) | (i << (size-r))) : i ;
}
KOKKOS_FORCEINLINE_FUNCTION
int bit_scan_forward(unsigned i)
{
#if defined( __CUDA_ARCH__ )
return __ffs(i) - 1;
#elif defined( __GNUC__ ) || defined( __GNUG__ )
return __builtin_ffs(i) - 1;
#elif defined( __INTEL_COMPILER )
return _bit_scan_forward(i);
#else
unsigned t = 1u;
int r = 0;
while (i && (i & t == 0))
{
t = t << 1;
++r;
}
return r;
#endif
}
KOKKOS_FORCEINLINE_FUNCTION
int bit_scan_reverse(unsigned i)
{
enum { shift = static_cast<int>(sizeof(unsigned)*CHAR_BIT - 1) };
#if defined( __CUDA_ARCH__ )
return shift - __clz(i);
#elif defined( __GNUC__ ) || defined( __GNUG__ )
return shift - __builtin_clz(i);
#elif defined( __INTEL_COMPILER )
return _bit_scan_reverse(i);
#else
unsigned t = 1u << shift;
int r = 0;
while (i && (i & t == 0))
{
t = t >> 1;
++r;
}
return r;
#endif
}
// count the bits set
KOKKOS_FORCEINLINE_FUNCTION
int popcount(unsigned i)
{
#if defined( __CUDA_ARCH__ )
return __popc(i);
#elif defined( __GNUC__ ) || defined( __GNUG__ )
return __builtin_popcount(i);
#elif defined ( __INTEL_COMPILER )
return _popcnt32(i);
#else
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive
i = i - ((i >> 1) & ~0u/3u); // temp
i = (i & ~0u/15u*3u) + ((i >> 2) & ~0u/15u*3u); // temp
i = (i + (i >> 4)) & ~0u/255u*15u; // temp
return (int)((i * (~0u/255u)) >> (sizeof(unsigned) - 1) * CHAR_BIT); // count
#endif
}
template <typename Bitset>
struct BitsetCount
{
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space::execution_space execution_space;
typedef typename bitset_type::size_type size_type;
typedef size_type value_type;
bitset_type m_bitset;
BitsetCount( bitset_type const& bitset)
: m_bitset(bitset)
{}
size_type apply() const
{
size_type count = 0u;
parallel_reduce(m_bitset.m_blocks.dimension_0(), *this, count);
return count;
}
KOKKOS_INLINE_FUNCTION
static void init( value_type & count)
{
count = 0u;
}
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & count, const volatile size_type & incr )
{
count += incr;
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i, value_type & count) const
{
count += popcount(m_bitset.m_blocks[i]);
}
};
}} //Kokkos::Impl
#endif // KOKKOS_BITSET_IMPL_HPP

View File

@ -1,195 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_FUNCTIONAL_IMPL_HPP
#define KOKKOS_FUNCTIONAL_IMPL_HPP
#include <Kokkos_Macros.hpp>
#include <stdint.h>
namespace Kokkos { namespace Impl {
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
KOKKOS_FORCEINLINE_FUNCTION
uint32_t getblock32 ( const uint8_t * p, int i )
{
// used to avoid aliasing error which could cause errors with
// forced inlining
return ((uint32_t)p[i*4+0])
| ((uint32_t)p[i*4+1] << 8)
| ((uint32_t)p[i*4+2] << 16)
| ((uint32_t)p[i*4+3] << 24);
}
KOKKOS_FORCEINLINE_FUNCTION
uint32_t rotl32 ( uint32_t x, int8_t r )
{ return (x << r) | (x >> (32 - r)); }
KOKKOS_FORCEINLINE_FUNCTION
uint32_t fmix32 ( uint32_t h )
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
KOKKOS_INLINE_FUNCTION
uint32_t MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 4;
uint32_t h1 = seed;
const uint32_t c1 = 0xcc9e2d51;
const uint32_t c2 = 0x1b873593;
//----------
// body
for(int i=0; i<nblocks; ++i)
{
uint32_t k1 = getblock32(data,i);
k1 *= c1;
k1 = rotl32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = rotl32(h1,13);
h1 = h1*5+0xe6546b64;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
uint32_t k1 = 0;
switch(len & 3)
{
case 3: k1 ^= tail[2] << 16;
case 2: k1 ^= tail[1] << 8;
case 1: k1 ^= tail[0];
k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len;
h1 = fmix32(h1);
return h1;
}
#if defined( __GNUC__ ) /* GNU C */ || \
defined( __GNUG__ ) /* GNU C++ */ || \
defined( __clang__ )
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
#else
#define KOKKOS_MAY_ALIAS
#endif
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
NUM_16 = NUM_8 / 2,
NUM_32 = NUM_8 / 4,
NUM_64 = NUM_8 / 8
};
union {
T const * const ptr;
T64 const * const ptr64;
T32 const * const ptr32;
T16 const * const ptr16;
T8 const * const ptr8;
} a = {a_ptr}, b = {b_ptr};
bool result = true;
for (int i=0; i < NUM_64; ++i) {
result = result && a.ptr64[i] == b.ptr64[i];
}
if ( NUM_64*2 < NUM_32 ) {
result = result && a.ptr32[NUM_64*2] == b.ptr32[NUM_64*2];
}
if ( NUM_32*2 < NUM_16 ) {
result = result && a.ptr16[NUM_32*2] == b.ptr16[NUM_32*2];
}
if ( NUM_16*2 < NUM_8 ) {
result = result && a.ptr8[NUM_16*2] == b.ptr8[NUM_16*2];
}
return result;
}
#undef KOKKOS_MAY_ALIAS
}} // namespace Kokkos::Impl
#endif //KOKKOS_FUNCTIONAL_IMPL_HPP

View File

@ -1,208 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
#define KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
inline
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view ,
typename Impl::enable_if< ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
{
return view ;
}
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
inline
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view )
{
// Force copy:
//typedef Impl::ViewAssignment< Impl::ViewDefault > alloc ; // unused
typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type ;
typename staticcrsgraph_type::HostMirror tmp ;
typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map);
// Allocation to match:
tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const'
tmp.entries = create_mirror( view.entries );
// Deep copy:
deep_copy( tmp_row_map , view.row_map );
deep_copy( tmp.entries , view.entries );
return tmp ;
}
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
inline
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view ,
typename Impl::enable_if< ! ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
{
return create_mirror( view );
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class StaticCrsGraphType , class InputSizeType >
inline
typename StaticCrsGraphType::staticcrsgraph_type
create_staticcrsgraph( const std::string & label ,
const std::vector< InputSizeType > & input )
{
typedef StaticCrsGraphType output_type ;
//typedef std::vector< InputSizeType > input_type ; // unused
typedef typename output_type::entries_type entries_type ;
typedef View< typename output_type::size_type [] ,
typename output_type::array_layout ,
typename output_type::execution_space > work_type ;
output_type output ;
// Create the row map:
const size_t length = input.size();
{
work_type row_work( "tmp" , length + 1 );
typename work_type::HostMirror row_work_host =
create_mirror_view( row_work );
size_t sum = 0 ;
row_work_host[0] = 0 ;
for ( size_t i = 0 ; i < length ; ++i ) {
row_work_host[i+1] = sum += input[i];
}
deep_copy( row_work , row_work_host );
output.entries = entries_type( label , sum );
output.row_map = row_work ;
}
return output ;
}
//----------------------------------------------------------------------------
template< class StaticCrsGraphType , class InputSizeType >
inline
typename StaticCrsGraphType::staticcrsgraph_type
create_staticcrsgraph( const std::string & label ,
const std::vector< std::vector< InputSizeType > > & input )
{
typedef StaticCrsGraphType output_type ;
typedef typename output_type::entries_type entries_type ;
static_assert( entries_type::rank == 1
, "Graph entries view must be rank one" );
typedef View< typename output_type::size_type [] ,
typename output_type::array_layout ,
typename output_type::execution_space > work_type ;
output_type output ;
// Create the row map:
const size_t length = input.size();
{
work_type row_work( "tmp" , length + 1 );
typename work_type::HostMirror row_work_host =
create_mirror_view( row_work );
size_t sum = 0 ;
row_work_host[0] = 0 ;
for ( size_t i = 0 ; i < length ; ++i ) {
row_work_host[i+1] = sum += input[i].size();
}
deep_copy( row_work , row_work_host );
output.entries = entries_type( label , sum );
output.row_map = row_work ;
}
// Fill in the entries:
{
typename entries_type::HostMirror host_entries =
create_mirror_view( output.entries );
size_t sum = 0 ;
for ( size_t i = 0 ; i < length ; ++i ) {
for ( size_t j = 0 ; j < input[i].size() ; ++j , ++sum ) {
host_entries( sum ) = input[i][j] ;
}
}
deep_copy( output.entries , host_entries );
}
return output ;
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP */

View File

@ -1,101 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_UnorderedMap.hpp>
namespace Kokkos { namespace Impl {
uint32_t find_hash_size(uint32_t size)
{
if (size == 0u) return 0u;
// these primes try to preserve randomness of hash
static const uint32_t primes [] = {
3, 7, 13, 23, 53, 97, 193, 389, 769, 1543
, 2237, 2423, 2617, 2797, 2999, 3167, 3359, 3539
, 3727, 3911, 4441 , 4787 , 5119 , 5471 , 5801 , 6143 , 6521 , 6827
, 7177 , 7517 , 7853 , 8887 , 9587 , 10243 , 10937 , 11617 , 12289
, 12967 , 13649 , 14341 , 15013 , 15727
, 17749 , 19121 , 20479 , 21859 , 23209 , 24593 , 25939 , 27329
, 28669 , 30047 , 31469 , 35507 , 38231 , 40961 , 43711 , 46439
, 49157 , 51893 , 54617 , 57347 , 60077 , 62801 , 70583 , 75619
, 80669 , 85703 , 90749 , 95783 , 100823 , 105871 , 110909 , 115963
, 120997 , 126031 , 141157 , 151237 , 161323 , 171401 , 181499 , 191579
, 201653 , 211741 , 221813 , 231893 , 241979 , 252079
, 282311 , 302483 , 322649 , 342803 , 362969 , 383143 , 403301 , 423457
, 443629 , 463787 , 483953 , 504121 , 564617 , 604949 , 645313 , 685609
, 725939 , 766273 , 806609 , 846931 , 887261 , 927587 , 967919 , 1008239
, 1123477 , 1198397 , 1273289 , 1348177 , 1423067 , 1497983 , 1572869
, 1647761 , 1722667 , 1797581 , 1872461 , 1947359 , 2022253
, 2246953 , 2396759 , 2546543 , 2696363 , 2846161 , 2995973 , 3145739
, 3295541 , 3445357 , 3595117 , 3744941 , 3894707 , 4044503
, 4493921 , 4793501 , 5093089 , 5392679 , 5692279 , 5991883 , 6291469
, 6591059 , 6890641 , 7190243 , 7489829 , 7789447 , 8089033
, 8987807 , 9586981 , 10186177 , 10785371 , 11384539 , 11983729
, 12582917 , 13182109 , 13781291 , 14380469 , 14979667 , 15578861
, 16178053 , 17895707 , 19014187 , 20132683 , 21251141 , 22369661
, 23488103 , 24606583 , 25725083 , 26843549 , 27962027 , 29080529
, 30198989 , 31317469 , 32435981 , 35791397 , 38028379 , 40265327
, 42502283 , 44739259 , 46976221 , 49213237 , 51450131 , 53687099
, 55924061 , 58161041 , 60397993 , 62634959 , 64871921
, 71582857 , 76056727 , 80530643 , 85004567 , 89478503 , 93952427
, 98426347 , 102900263 , 107374217 , 111848111 , 116322053 , 120795971
, 125269877 , 129743807 , 143165587 , 152113427 , 161061283 , 170009141
, 178956983 , 187904819 , 196852693 , 205800547 , 214748383 , 223696237
, 232644089 , 241591943 , 250539763 , 259487603 , 268435399
};
const uint32_t num_primes = sizeof(primes)/sizeof(uint32_t);
uint32_t hsize = primes[num_primes-1] ;
for (uint32_t i = 0; i < num_primes; ++i) {
if (size <= primes[i]) {
hsize = primes[i];
break;
}
}
return hsize;
}
}} // namespace Kokkos::Impl

View File

@ -1,297 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_UNORDERED_MAP_IMPL_HPP
#define KOKKOS_UNORDERED_MAP_IMPL_HPP
#include <Kokkos_Core_fwd.hpp>
#include <stdint.h>
#include <cstdio>
#include <climits>
#include <iostream>
#include <iomanip>
namespace Kokkos { namespace Impl {
uint32_t find_hash_size( uint32_t size );
template <typename Map>
struct UnorderedMapRehash
{
typedef Map map_type;
typedef typename map_type::const_map_type const_map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
map_type m_dst;
const_map_type m_src;
UnorderedMapRehash( map_type const& dst, const_map_type const& src)
: m_dst(dst), m_src(src)
{}
void apply() const
{
parallel_for(m_src.capacity(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const
{
if ( m_src.valid_at(i) )
m_dst.insert(m_src.key_at(i), m_src.value_at(i));
}
};
template <typename UMap>
struct UnorderedMapErase
{
typedef UMap map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
typedef typename map_type::key_type key_type;
typedef typename map_type::impl_value_type value_type;
map_type m_map;
UnorderedMapErase( map_type const& map)
: m_map(map)
{}
void apply() const
{
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i ) const
{
const size_type invalid_index = map_type::invalid_index;
size_type curr = m_map.m_hash_lists(i);
size_type next = invalid_index;
// remove erased head of the linked-list
while (curr != invalid_index && !m_map.valid_at(curr)) {
next = m_map.m_next_index[curr];
m_map.m_next_index[curr] = invalid_index;
m_map.m_keys[curr] = key_type();
if (m_map.is_set) m_map.m_values[curr] = value_type();
curr = next;
m_map.m_hash_lists(i) = next;
}
// if the list is non-empty and the head is valid
if (curr != invalid_index && m_map.valid_at(curr) ) {
size_type prev = curr;
curr = m_map.m_next_index[prev];
while (curr != invalid_index) {
next = m_map.m_next_index[curr];
if (m_map.valid_at(curr)) {
prev = curr;
}
else {
// remove curr from list
m_map.m_next_index[prev] = next;
m_map.m_next_index[curr] = invalid_index;
m_map.m_keys[curr] = key_type();
if (map_type::is_set) m_map.m_values[curr] = value_type();
}
curr = next;
}
}
}
};
template <typename UMap>
struct UnorderedMapHistogram
{
typedef UMap map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
typedef View<int[100], execution_space> histogram_view;
typedef typename histogram_view::HostMirror host_histogram_view;
map_type m_map;
histogram_view m_length;
histogram_view m_distance;
histogram_view m_block_distance;
UnorderedMapHistogram( map_type const& map)
: m_map(map)
, m_length("UnorderedMap Histogram")
, m_distance("UnorderedMap Histogram")
, m_block_distance("UnorderedMap Histogram")
{}
void calculate()
{
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
}
void clear()
{
Kokkos::deep_copy(m_length, 0);
Kokkos::deep_copy(m_distance, 0);
Kokkos::deep_copy(m_block_distance, 0);
}
void print_length(std::ostream &out)
{
host_histogram_view host_copy = create_mirror_view(m_length);
Kokkos::deep_copy(host_copy, m_length);
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
{
out << host_copy[i] << " , ";
}
out << "\b\b\b " << std::endl;
}
void print_distance(std::ostream &out)
{
host_histogram_view host_copy = create_mirror_view(m_distance);
Kokkos::deep_copy(host_copy, m_distance);
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
{
out << host_copy[i] << " , ";
}
out << "\b\b\b " << std::endl;
}
void print_block_distance(std::ostream &out)
{
host_histogram_view host_copy = create_mirror_view(m_block_distance);
Kokkos::deep_copy(host_copy, m_block_distance);
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
{
out << host_copy[i] << " , ";
}
out << "\b\b\b " << std::endl;
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i ) const
{
const size_type invalid_index = map_type::invalid_index;
uint32_t length = 0;
size_type min_index = ~0u, max_index = 0;
for (size_type curr = m_map.m_hash_lists(i); curr != invalid_index; curr = m_map.m_next_index[curr]) {
++length;
min_index = (curr < min_index) ? curr : min_index;
max_index = (max_index < curr) ? curr : max_index;
}
size_type distance = (0u < length) ? max_index - min_index : 0u;
size_type blocks = (0u < length) ? max_index/32u - min_index/32u : 0u;
// normalize data
length = length < 100u ? length : 99u;
distance = distance < 100u ? distance : 99u;
blocks = blocks < 100u ? blocks : 99u;
if (0u < length)
{
atomic_fetch_add( &m_length(length), 1);
atomic_fetch_add( &m_distance(distance), 1);
atomic_fetch_add( &m_block_distance(blocks), 1);
}
}
};
template <typename UMap>
struct UnorderedMapPrint
{
typedef UMap map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
map_type m_map;
UnorderedMapPrint( map_type const& map)
: m_map(map)
{}
void apply()
{
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i ) const
{
const size_type invalid_index = map_type::invalid_index;
uint32_t list = m_map.m_hash_lists(i);
for (size_type curr = list, ii=0; curr != invalid_index; curr = m_map.m_next_index[curr], ++ii) {
printf("%d[%d]: %d->%d\n", list, ii, m_map.key_at(curr), m_map.value_at(curr));
}
}
};
template <typename DKey, typename DValue, typename SKey, typename SValue>
struct UnorderedMapCanAssign : public false_ {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<Key,Value,Key,Value> : public true_ {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key,Value,Key,Value> : public true_ {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key,const Value,Key,Value> : public true_ {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key,const Value,const Key,Value> : public true_ {};
}} //Kokkos::Impl
#endif // KOKKOS_UNORDERED_MAP_IMPL_HPP

View File

@ -1,92 +0,0 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests
TEST_TARGETS =
TARGETS =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_Cuda
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_Threads
TEST_TARGETS += test-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_OpenMP
TEST_TARGETS += test-openmp
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_Serial
TEST_TARGETS += test-serial
endif
KokkosContainers_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Cuda
KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Threads
KokkosContainers_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_OpenMP
KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Serial
test-cuda: KokkosContainers_UnitTest_Cuda
./KokkosContainers_UnitTest_Cuda
test-threads: KokkosContainers_UnitTest_Threads
./KokkosContainers_UnitTest_Threads
test-openmp: KokkosContainers_UnitTest_OpenMP
./KokkosContainers_UnitTest_OpenMP
test-serial: KokkosContainers_UnitTest_Serial
./KokkosContainers_UnitTest_Serial
build_all: $(TARGETS)
test: $(TEST_TARGETS)
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc

View File

@ -1,285 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_BITSET_HPP
#define KOKKOS_TEST_BITSET_HPP
#include <gtest/gtest.h>
#include <iostream>
namespace Test {
namespace Impl {
template <typename Bitset, bool Set>
struct TestBitset
{
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space execution_space;
typedef uint32_t value_type;
bitset_type m_bitset;
TestBitset( bitset_type const& bitset)
: m_bitset(bitset)
{}
unsigned testit(unsigned collisions)
{
execution_space::fence();
unsigned count = 0;
Kokkos::parallel_reduce( m_bitset.size()*collisions, *this, count);
return count;
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const { v = 0; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, const volatile value_type & src ) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & v) const
{
i = i % m_bitset.size();
if (Set) {
if (m_bitset.set(i)) {
if (m_bitset.test(i)) ++v;
}
}
else {
if (m_bitset.reset(i)) {
if (!m_bitset.test(i)) ++v;
}
}
}
};
template <typename Bitset>
struct TestBitsetTest
{
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space execution_space;
typedef uint32_t value_type;
bitset_type m_bitset;
TestBitsetTest( bitset_type const& bitset)
: m_bitset(bitset)
{}
unsigned testit()
{
execution_space::fence();
unsigned count = 0;
Kokkos::parallel_reduce( m_bitset.size(), *this, count);
return count;
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const { v = 0; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, const volatile value_type & src ) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & v) const
{
if (m_bitset.test( i )) ++v;
}
};
template <typename Bitset, bool Set>
struct TestBitsetAny
{
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space execution_space;
typedef uint32_t value_type;
bitset_type m_bitset;
TestBitsetAny( bitset_type const& bitset)
: m_bitset(bitset)
{}
unsigned testit()
{
execution_space::fence();
unsigned count = 0;
Kokkos::parallel_reduce( m_bitset.size(), *this, count);
return count;
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const { v = 0; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, const volatile value_type & src ) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & v) const
{
bool result = false;
unsigned attempts = 0;
uint32_t hint = (i >> 4) << 4;
while (attempts < m_bitset.max_hint()) {
if (Set) {
Kokkos::tie(result, hint) = m_bitset.find_any_unset_near(hint, i);
if (result && m_bitset.set(hint)) {
++v;
break;
}
else if (!result) {
++attempts;
}
}
else {
Kokkos::tie(result, hint) = m_bitset.find_any_set_near(hint, i);
if (result && m_bitset.reset(hint)) {
++v;
break;
}
else if (!result) {
++attempts;
}
}
}
}
};
} // namespace Impl
template <typename Device>
void test_bitset()
{
typedef Kokkos::Bitset< Device > bitset_type;
typedef Kokkos::ConstBitset< Device > const_bitset_type;
//unsigned test_sizes[] = { 0u, 1000u, 1u<<14, 1u<<16, 10000001 };
unsigned test_sizes[] = { 1000u, 1u<<14, 1u<<16, 10000001 };
for (int i=0, end = sizeof(test_sizes)/sizeof(unsigned); i<end; ++i) {
//std::cout << "Bitset " << test_sizes[i] << std::endl;
bitset_type bitset(test_sizes[i]);
//std::cout << " Check inital count " << std::endl;
// nothing should be set
{
Impl::TestBitsetTest< bitset_type > f(bitset);
uint32_t count = f.testit();
EXPECT_EQ(0u, count);
EXPECT_EQ(count, bitset.count());
}
//std::cout << " Check set() " << std::endl;
bitset.set();
// everything should be set
{
Impl::TestBitsetTest< const_bitset_type > f(bitset);
uint32_t count = f.testit();
EXPECT_EQ(bitset.size(), count);
EXPECT_EQ(count, bitset.count());
}
//std::cout << " Check reset() " << std::endl;
bitset.reset();
EXPECT_EQ(0u, bitset.count());
//std::cout << " Check set(i) " << std::endl;
// test setting bits
{
Impl::TestBitset< bitset_type, true > f(bitset);
uint32_t count = f.testit(10u);
EXPECT_EQ( bitset.size(), bitset.count());
EXPECT_EQ( bitset.size(), count );
}
//std::cout << " Check reset(i) " << std::endl;
// test resetting bits
{
Impl::TestBitset< bitset_type, false > f(bitset);
uint32_t count = f.testit(10u);
EXPECT_EQ( bitset.size(), count);
EXPECT_EQ( 0u, bitset.count() );
}
//std::cout << " Check find_any_set(i) " << std::endl;
// test setting any bits
{
Impl::TestBitsetAny< bitset_type, true > f(bitset);
uint32_t count = f.testit();
EXPECT_EQ( bitset.size(), bitset.count());
EXPECT_EQ( bitset.size(), count );
}
//std::cout << " Check find_any_unset(i) " << std::endl;
// test resetting any bits
{
Impl::TestBitsetAny< bitset_type, false > f(bitset);
uint32_t count = f.testit();
EXPECT_EQ( bitset.size(), count);
EXPECT_EQ( 0u, bitset.count() );
}
}
}
} // namespace Test
#endif //KOKKOS_TEST_BITSET_HPP

View File

@ -1,264 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_COMPLEX_HPP
#define KOKKOS_TEST_COMPLEX_HPP
//#include <Kokkos_Complex.hpp>
#include <gtest/gtest.h>
#include <iostream>
namespace Test {
namespace Impl {
template <typename RealType>
void testComplexConstructors () {
typedef Kokkos::complex<RealType> complex_type;
complex_type z1;
complex_type z2 (0.0, 0.0);
complex_type z3 (1.0, 0.0);
complex_type z4 (0.0, 1.0);
complex_type z5 (-1.0, -2.0);
ASSERT_TRUE( z1 == z2 );
ASSERT_TRUE( z1 != z3 );
ASSERT_TRUE( z1 != z4 );
ASSERT_TRUE( z1 != z5 );
ASSERT_TRUE( z2 != z3 );
ASSERT_TRUE( z2 != z4 );
ASSERT_TRUE( z2 != z5 );
ASSERT_TRUE( z3 != z4 );
ASSERT_TRUE( z3 != z5 );
complex_type z6 (-1.0, -2.0);
ASSERT_TRUE( z5 == z6 );
// Make sure that complex has value semantics, in particular, that
// equality tests use values and not pointers, so that
// reassignment actually changes the value.
z1 = complex_type (-3.0, -4.0);
ASSERT_TRUE( z1.real () == -3.0 );
ASSERT_TRUE( z1.imag () == -4.0 );
ASSERT_TRUE( z1 != z2 );
complex_type z7 (1.0);
ASSERT_TRUE( z3 == z7 );
ASSERT_TRUE( z7 == 1.0 );
ASSERT_TRUE( z7 != -1.0 );
z7 = complex_type (5.0);
ASSERT_TRUE( z7.real () == 5.0 );
ASSERT_TRUE( z7.imag () == 0.0 );
}
template <typename RealType>
void testPlus () {
typedef Kokkos::complex<RealType> complex_type;
complex_type z1 (1.0, -1.0);
complex_type z2 (-1.0, 1.0);
complex_type z3 = z1 + z2;
ASSERT_TRUE( z3 == complex_type (0.0, 0.0) );
}
template <typename RealType>
void testMinus () {
typedef Kokkos::complex<RealType> complex_type;
// Test binary minus.
complex_type z1 (1.0, -1.0);
complex_type z2 (-1.0, 1.0);
complex_type z3 = z1 - z2;
ASSERT_TRUE( z3 == complex_type (2.0, -2.0) );
// Test unary minus.
complex_type z4 (3.0, -4.0);
ASSERT_TRUE( -z1 == complex_type (-3.0, 4.0) );
}
template <typename RealType>
void testTimes () {
typedef Kokkos::complex<RealType> complex_type;
complex_type z1 (1.0, -1.0);
complex_type z2 (-1.0, 1.0);
complex_type z3 = z1 - z2;
ASSERT_TRUE( z3 == complex_type (2.0, -2.0) );
// Test unary minus.
complex_type z4 (3.0, -4.0);
ASSERT_TRUE( z4 == complex_type (3.0, -4.0) );
ASSERT_TRUE( -z4 == complex_type (-3.0, 4.0) );
ASSERT_TRUE( z4 == -complex_type (-3.0, 4.0) );
}
template <typename RealType>
void testDivide () {
typedef Kokkos::complex<RealType> complex_type;
// Test division of a complex number by a real number.
complex_type z1 (1.0, -1.0);
complex_type z2 (1.0 / 2.0, -1.0 / 2.0);
ASSERT_TRUE( z1 / 2.0 == z2 );
// (-1+2i)/(1-i) == ((-1+2i)(1+i)) / ((1-i)(1+i))
// (-1+2i)(1+i) == -3 + i
complex_type z3 (-1.0, 2.0);
complex_type z4 (1.0, -1.0);
complex_type z5 (-3.0, 1.0);
ASSERT_TRUE(z3 * Kokkos::conj (z4) == z5 );
// Test division of a complex number by a complex number.
// This assumes that RealType is a floating-point type.
complex_type z6 (Kokkos::real (z5) / 2.0,
Kokkos::imag (z5) / 2.0);
complex_type z7 = z3 / z4;
ASSERT_TRUE( z7 == z6 );
}
template <typename RealType>
void testOutsideKernel () {
testComplexConstructors<RealType> ();
testPlus<RealType> ();
testTimes<RealType> ();
testDivide<RealType> ();
}
template<typename RealType, typename Device>
void testCreateView () {
typedef Kokkos::complex<RealType> complex_type;
Kokkos::View<complex_type*, Device> x ("x", 10);
ASSERT_TRUE( x.dimension_0 () == 10 );
// Test that View assignment works.
Kokkos::View<complex_type*, Device> x_nonconst = x;
Kokkos::View<const complex_type*, Device> x_const = x;
}
template<typename RealType, typename Device>
class Fill {
public:
typedef typename Device::execution_space execution_space;
typedef Kokkos::View<Kokkos::complex<RealType>*, Device> view_type;
typedef typename view_type::size_type size_type;
KOKKOS_INLINE_FUNCTION
void operator () (const size_type i) const {
x_(i) = val_;
}
Fill (const view_type& x, const Kokkos::complex<RealType>& val) :
x_ (x), val_ (val)
{}
private:
view_type x_;
const Kokkos::complex<RealType> val_;
};
template<typename RealType, typename Device>
class Sum {
public:
typedef typename Device::execution_space execution_space;
typedef Kokkos::View<const Kokkos::complex<RealType>*, Device> view_type;
typedef typename view_type::size_type size_type;
typedef Kokkos::complex<RealType> value_type;
KOKKOS_INLINE_FUNCTION
void operator () (const size_type i, Kokkos::complex<RealType>& sum) const {
sum += x_(i);
}
Sum (const view_type& x) : x_ (x) {}
private:
view_type x_;
};
template<typename RealType, typename Device>
void testInsideKernel () {
typedef Kokkos::complex<RealType> complex_type;
typedef Kokkos::View<complex_type*, Device> view_type;
typedef typename view_type::size_type size_type;
const size_type N = 1000;
view_type x ("x", N);
ASSERT_TRUE( x.dimension_0 () == N );
// Kokkos::parallel_reduce (N, [=] (const size_type i, complex_type& result) {
// result += x[i];
// });
Kokkos::parallel_for (N, Fill<RealType, Device> (x, complex_type (1.0, -1.0)));
complex_type sum;
Kokkos::parallel_reduce (N, Sum<RealType, Device> (x), sum);
ASSERT_TRUE( sum.real () == 1000.0 && sum.imag () == -1000.0 );
}
} // namespace Impl
template <typename Device>
void testComplex ()
{
Impl::testOutsideKernel<float> ();
Impl::testOutsideKernel<double> ();
Impl::testCreateView<float, Device> ();
Impl::testCreateView<double, Device> ();
Impl::testInsideKernel<float, Device> ();
Impl::testInsideKernel<double, Device> ();
}
} // namespace Test
#endif // KOKKOS_TEST_COMPLEX_HPP

View File

@ -1,206 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <iostream>
#include <iomanip>
#include <stdint.h>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_Vector.hpp>
#include <TestBitset.hpp>
#include <TestUnorderedMap.hpp>
#include <TestStaticCrsGraph.hpp>
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestSegmentedView.hpp>
//----------------------------------------------------------------------------
#ifdef KOKKOS_HAVE_CUDA
namespace Test {
class cuda : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase()
{
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};
TEST_F( cuda , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Cuda >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Cuda >();
}
void cuda_test_insert_close( uint32_t num_nodes
, uint32_t num_inserts
, uint32_t num_duplicates
)
{
test_insert< Kokkos::Cuda >( num_nodes, num_inserts, num_duplicates, true);
}
void cuda_test_insert_far( uint32_t num_nodes
, uint32_t num_inserts
, uint32_t num_duplicates
)
{
test_insert< Kokkos::Cuda >( num_nodes, num_inserts, num_duplicates, false);
}
void cuda_test_failed_insert( uint32_t num_nodes )
{
test_failed_insert< Kokkos::Cuda >( num_nodes );
}
void cuda_test_deep_copy( uint32_t num_nodes )
{
test_deep_copy< Kokkos::Cuda >( num_nodes );
}
void cuda_test_vector_combinations(unsigned int size)
{
test_vector_combinations<int,Kokkos::Cuda>(size);
}
void cuda_test_dualview_combinations(unsigned int size)
{
test_dualview_combinations<int,Kokkos::Cuda>(size);
}
void cuda_test_segmented_view(unsigned int size)
{
test_segmented_view<double,Kokkos::Cuda>(size);
}
void cuda_test_bitset()
{
test_bitset<Kokkos::Cuda>();
}
/*TEST_F( cuda, bitset )
{
cuda_test_bitset();
}*/
#define CUDA_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat ) \
TEST_F( cuda, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
cuda_test_insert_##name(num_nodes,num_inserts,num_duplicates); \
}
#define CUDA_FAILED_INSERT_TEST( num_nodes, repeat ) \
TEST_F( cuda, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
cuda_test_failed_insert(num_nodes); \
}
#define CUDA_ASSIGNEMENT_TEST( num_nodes, repeat ) \
TEST_F( cuda, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
cuda_test_assignment_operators(num_nodes); \
}
#define CUDA_DEEP_COPY( num_nodes, repeat ) \
TEST_F( cuda, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
cuda_test_deep_copy(num_nodes); \
}
#define CUDA_VECTOR_COMBINE_TEST( size ) \
TEST_F( cuda, vector_combination##size##x) { \
cuda_test_vector_combinations(size); \
}
#define CUDA_DUALVIEW_COMBINE_TEST( size ) \
TEST_F( cuda, dualview_combination##size##x) { \
cuda_test_dualview_combinations(size); \
}
#define CUDA_SEGMENTEDVIEW_TEST( size ) \
TEST_F( cuda, segmentedview_##size##x) { \
cuda_test_segmented_view(size); \
}
CUDA_DUALVIEW_COMBINE_TEST( 10 )
CUDA_VECTOR_COMBINE_TEST( 10 )
CUDA_VECTOR_COMBINE_TEST( 3057 )
CUDA_INSERT_TEST(close, 100000, 90000, 100, 500)
CUDA_INSERT_TEST(far, 100000, 90000, 100, 500)
CUDA_DEEP_COPY( 10000, 1 )
CUDA_FAILED_INSERT_TEST( 10000, 1000 )
CUDA_SEGMENTEDVIEW_TEST( 200 )
#undef CUDA_INSERT_TEST
#undef CUDA_FAILED_INSERT_TEST
#undef CUDA_ASSIGNEMENT_TEST
#undef CUDA_DEEP_COPY
#undef CUDA_VECTOR_COMBINE_TEST
#undef CUDA_DUALVIEW_COMBINE_TEST
#undef CUDA_SEGMENTEDVIEW_TEST
}
#endif /* #ifdef KOKKOS_HAVE_CUDA */

View File

@ -1,121 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TEST_DUALVIEW_HPP
#define KOKKOS_TEST_DUALVIEW_HPP
#include <gtest/gtest.h>
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <impl/Kokkos_Timer.hpp>
namespace Test {
namespace Impl {
template <typename Scalar, class Device>
struct test_dualview_combinations
{
typedef test_dualview_combinations<Scalar,Device> self_type;
typedef Scalar scalar_type;
typedef Device execution_space;
Scalar reference;
Scalar result;
template <typename ViewType>
Scalar run_me(unsigned int n,unsigned int m){
if(n<10) n = 10;
if(m<3) m = 3;
ViewType a("A",n,m);
Kokkos::deep_copy( a.d_view , 1 );
a.template modify<typename ViewType::execution_space>();
a.template sync<typename ViewType::host_mirror_space>();
a.h_view(5,1) = 3;
a.h_view(6,1) = 4;
a.h_view(7,2) = 5;
a.template modify<typename ViewType::host_mirror_space>();
ViewType b = Kokkos::subview(a,std::pair<unsigned int, unsigned int>(6,9),std::pair<unsigned int, unsigned int>(0,1));
a.template sync<typename ViewType::execution_space>();
b.template modify<typename ViewType::execution_space>();
Kokkos::deep_copy( b.d_view , 2 );
a.template sync<typename ViewType::host_mirror_space>();
Scalar count = 0;
for(unsigned int i = 0; i<a.d_view.dimension_0(); i++)
for(unsigned int j = 0; j<a.d_view.dimension_1(); j++)
count += a.h_view(i,j);
return count - a.d_view.dimension_0()*a.d_view.dimension_1()-2-4-3*2;
}
test_dualview_combinations(unsigned int size)
{
result = run_me< Kokkos::DualView<Scalar**,Kokkos::LayoutLeft,Device> >(size,3);
}
};
} // namespace Impl
template <typename Scalar, typename Device>
void test_dualview_combinations(unsigned int size)
{
Impl::test_dualview_combinations<Scalar,Device> test(size);
ASSERT_EQ( test.result,0);
}
} // namespace Test
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP

View File

@ -1,162 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_Vector.hpp>
//----------------------------------------------------------------------------
#include <TestBitset.hpp>
#include <TestUnorderedMap.hpp>
#include <TestStaticCrsGraph.hpp>
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestSegmentedView.hpp>
#include <TestComplex.hpp>
#include <iomanip>
namespace Test {
#ifdef KOKKOS_HAVE_OPENMP
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned threads_count = 4 ;
if ( Kokkos::hwloc::available() ) {
threads_count = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa();
}
Kokkos::OpenMP::initialize( threads_count );
}
static void TearDownTestCase()
{
Kokkos::OpenMP::finalize();
}
};
TEST_F( openmp, complex )
{
testComplex<Kokkos::OpenMP> ();
}
TEST_F( openmp, bitset )
{
test_bitset<Kokkos::OpenMP>();
}
TEST_F( openmp , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::OpenMP >();
TestStaticCrsGraph::run_test_graph2< Kokkos::OpenMP >();
}
#define OPENMP_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
TEST_F( openmp, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_insert<Kokkos::OpenMP>(num_nodes,num_inserts,num_duplicates, near); \
}
#define OPENMP_FAILED_INSERT_TEST( num_nodes, repeat ) \
TEST_F( openmp, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_failed_insert<Kokkos::OpenMP>(num_nodes); \
}
#define OPENMP_ASSIGNEMENT_TEST( num_nodes, repeat ) \
TEST_F( openmp, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_assignement_operators<Kokkos::OpenMP>(num_nodes); \
}
#define OPENMP_DEEP_COPY( num_nodes, repeat ) \
TEST_F( openmp, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_deep_copy<Kokkos::OpenMP>(num_nodes); \
}
#define OPENMP_VECTOR_COMBINE_TEST( size ) \
TEST_F( openmp, vector_combination##size##x) { \
test_vector_combinations<int,Kokkos::OpenMP>(size); \
}
#define OPENMP_DUALVIEW_COMBINE_TEST( size ) \
TEST_F( openmp, dualview_combination##size##x) { \
test_dualview_combinations<int,Kokkos::OpenMP>(size); \
}
#define OPENMP_SEGMENTEDVIEW_TEST( size ) \
TEST_F( openmp, segmentedview_##size##x) { \
test_segmented_view<double,Kokkos::OpenMP>(size); \
}
OPENMP_INSERT_TEST(close, 100000, 90000, 100, 500, true)
OPENMP_INSERT_TEST(far, 100000, 90000, 100, 500, false)
OPENMP_FAILED_INSERT_TEST( 10000, 1000 )
OPENMP_DEEP_COPY( 10000, 1 )
OPENMP_VECTOR_COMBINE_TEST( 10 )
OPENMP_VECTOR_COMBINE_TEST( 3057 )
OPENMP_DUALVIEW_COMBINE_TEST( 10 )
OPENMP_SEGMENTEDVIEW_TEST( 10000 )
#undef OPENMP_INSERT_TEST
#undef OPENMP_FAILED_INSERT_TEST
#undef OPENMP_ASSIGNEMENT_TEST
#undef OPENMP_DEEP_COPY
#undef OPENMP_VECTOR_COMBINE_TEST
#undef OPENMP_DUALVIEW_COMBINE_TEST
#undef OPENMP_SEGMENTEDVIEW_TEST
#endif
} // namespace test

View File

@ -1,708 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP
#define KOKKOS_TEST_SEGMENTEDVIEW_HPP
#include <gtest/gtest.h>
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <Kokkos_Core.hpp>
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
#include <Kokkos_SegmentedView.hpp>
#include <impl/Kokkos_Timer.hpp>
namespace Test {
namespace Impl {
template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
struct GrowTest;
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 1> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+team_member.team_size());
value += team_idx + team_member.team_rank();
if((a.dimension_0()>team_idx+team_member.team_rank()) &&
(a.dimension(0)>team_idx+team_member.team_rank()))
a(team_idx+team_member.team_rank()) = team_idx+team_member.team_rank();
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 2> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
value += team_idx + team_member.team_rank() + 13*k;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) {
a(team_idx+ team_member.team_rank(),k) =
team_idx+ team_member.team_rank() + 13*k;
}
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 3> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
value += team_idx + team_member.team_rank() + 13*k + 3*l;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
a(team_idx+ team_member.team_rank(),k,l) =
team_idx+ team_member.team_rank() + 13*k + 3*l;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 4> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
value += team_idx + team_member.team_rank() + 13*k + 3*l + 7*m;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
a(team_idx+ team_member.team_rank(),k,l,m) =
team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 5> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
for( typename ExecutionSpace::size_type n=0;n<3;n++)
value +=
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
a(team_idx+ team_member.team_rank(),k,l,m,n) =
team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 6> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
for( typename ExecutionSpace::size_type n=0;n<3;n++)
for( typename ExecutionSpace::size_type o=0;o<2;o++)
value +=
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
a(team_idx+ team_member.team_rank(),k,l,m,n,o) =
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 7> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
for( typename ExecutionSpace::size_type n=0;n<3;n++)
for( typename ExecutionSpace::size_type o=0;o<2;o++)
for( typename ExecutionSpace::size_type p=0;p<4;p++)
value +=
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
a(team_idx+ team_member.team_rank(),k,l,m,n,o,p) =
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 8> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx + team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
for( typename ExecutionSpace::size_type n=0;n<3;n++)
for( typename ExecutionSpace::size_type o=0;o<2;o++)
for( typename ExecutionSpace::size_type p=0;p<4;p++)
for( typename ExecutionSpace::size_type q=0;q<3;q++)
value +=
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q) =
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
}
}
};
template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
struct VerifyTest;
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 1> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
value += a(team_idx+ team_member.team_rank());
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 2> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
value += a(team_idx+ team_member.team_rank(),k);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 3> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
value += a(team_idx+ team_member.team_rank(),k,l);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 4> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
value += a(team_idx+ team_member.team_rank(),k,l,m);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 5> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
value += a(team_idx+ team_member.team_rank(),k,l,m,n);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 6> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 7> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 8> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q);
}
}
};
template <typename Scalar, class ExecutionSpace>
struct test_segmented_view
{
typedef test_segmented_view<Scalar,ExecutionSpace> self_type;
typedef Scalar scalar_type;
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
double result;
double reference;
template <class ViewType>
void run_me(ViewType a, int max_length){
const int team_size = Policy::team_size_max( GrowTest<ViewType,execution_space>(a) );
const int nteams = max_length/team_size;
reference = 0;
result = 0;
Kokkos::parallel_reduce(Policy(nteams,team_size),GrowTest<ViewType,execution_space>(a),reference);
Kokkos::fence();
Kokkos::parallel_reduce(Policy(nteams,team_size),VerifyTest<ViewType,execution_space>(a),result);
Kokkos::fence();
}
test_segmented_view(unsigned int size,int rank)
{
reference = 0;
result = 0;
const int dim_1 = 7;
const int dim_2 = 3;
const int dim_3 = 2;
const int dim_4 = 3;
const int dim_5 = 2;
const int dim_6 = 4;
//const int dim_7 = 3;
if(rank==1) {
typedef Kokkos::Experimental::SegmentedView<Scalar*,Kokkos::LayoutLeft,ExecutionSpace> rank1_view;
run_me< rank1_view >(rank1_view("Rank1",128,size), size);
}
if(rank==2) {
typedef Kokkos::Experimental::SegmentedView<Scalar**,Kokkos::LayoutLeft,ExecutionSpace> rank2_view;
run_me< rank2_view >(rank2_view("Rank2",128,size,dim_1), size);
}
if(rank==3) {
typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2],Kokkos::LayoutRight,ExecutionSpace> rank3_view;
run_me< rank3_view >(rank3_view("Rank3",128,size), size);
}
if(rank==4) {
typedef Kokkos::Experimental::SegmentedView<Scalar****,Kokkos::LayoutRight,ExecutionSpace> rank4_view;
run_me< rank4_view >(rank4_view("Rank4",128,size,dim_1,dim_2,dim_3), size);
}
if(rank==5) {
typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2][3],Kokkos::LayoutLeft,ExecutionSpace> rank5_view;
run_me< rank5_view >(rank5_view("Rank5",128,size), size);
}
if(rank==6) {
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2],Kokkos::LayoutRight,ExecutionSpace> rank6_view;
run_me< rank6_view >(rank6_view("Rank6",128,size,dim_1,dim_2,dim_3,dim_4), size);
}
if(rank==7) {
typedef Kokkos::Experimental::SegmentedView<Scalar*******,Kokkos::LayoutLeft,ExecutionSpace> rank7_view;
run_me< rank7_view >(rank7_view("Rank7",128,size,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6), size);
}
if(rank==8) {
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> rank8_view;
run_me< rank8_view >(rank8_view("Rank8",128,size,dim_1,dim_2,dim_3,dim_4), size);
}
}
};
} // namespace Impl
template <typename Scalar, class ExecutionSpace>
void test_segmented_view(unsigned int size)
{
{
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> view_type;
view_type a("A",128,size,7,3,2,3);
double reference;
Impl::GrowTest<view_type,ExecutionSpace> f(a);
const int team_size = Kokkos::TeamPolicy<ExecutionSpace>::team_size_max( f );
const int nteams = (size+team_size-1)/team_size;
Kokkos::parallel_reduce(Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size),f,reference);
size_t real_size = ((size+127)/128)*128;
ASSERT_EQ(real_size,a.dimension_0());
ASSERT_EQ(7,a.dimension_1());
ASSERT_EQ(3,a.dimension_2());
ASSERT_EQ(2,a.dimension_3());
ASSERT_EQ(3,a.dimension_4());
ASSERT_EQ(2,a.dimension_5());
ASSERT_EQ(4,a.dimension_6());
ASSERT_EQ(3,a.dimension_7());
ASSERT_EQ(real_size,a.dimension(0));
ASSERT_EQ(7,a.dimension(1));
ASSERT_EQ(3,a.dimension(2));
ASSERT_EQ(2,a.dimension(3));
ASSERT_EQ(3,a.dimension(4));
ASSERT_EQ(2,a.dimension(5));
ASSERT_EQ(4,a.dimension(6));
ASSERT_EQ(3,a.dimension(7));
ASSERT_EQ(8,a.Rank);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,1);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,2);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,3);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,4);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,5);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,6);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,7);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,8);
ASSERT_EQ(test.reference,test.result);
}
}
} // namespace Test
#else
template <typename Scalar, class ExecutionSpace>
void test_segmented_view(unsigned int ) {}
#endif
#endif /* #ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP */

View File

@ -1,158 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if ! defined(KOKKOS_HAVE_SERIAL)
# error "It doesn't make sense to build this file unless the Kokkos::Serial device is enabled. If you see this message, it probably means that there is an error in Kokkos' CMake build infrastructure."
#else
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_Vector.hpp>
#include <TestBitset.hpp>
#include <TestUnorderedMap.hpp>
#include <TestStaticCrsGraph.hpp>
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestSegmentedView.hpp>
#include <TestComplex.hpp>
#include <iomanip>
namespace Test {
class serial : public ::testing::Test {
protected:
static void SetUpTestCase () {
std::cout << std::setprecision(5) << std::scientific;
Kokkos::Serial::initialize ();
}
static void TearDownTestCase () {
Kokkos::Serial::finalize ();
}
};
TEST_F( serial , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Serial >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Serial >();
}
TEST_F( serial, complex )
{
testComplex<Kokkos::Serial> ();
}
TEST_F( serial, bitset )
{
test_bitset<Kokkos::Serial> ();
}
#define SERIAL_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
TEST_F( serial, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_insert<Kokkos::Serial> (num_nodes, num_inserts, num_duplicates, near); \
}
#define SERIAL_FAILED_INSERT_TEST( num_nodes, repeat ) \
TEST_F( serial, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_failed_insert<Kokkos::Serial> (num_nodes); \
}
#define SERIAL_ASSIGNEMENT_TEST( num_nodes, repeat ) \
TEST_F( serial, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_assignement_operators<Kokkos::Serial> (num_nodes); \
}
#define SERIAL_DEEP_COPY( num_nodes, repeat ) \
TEST_F( serial, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_deep_copy<Kokkos::Serial> (num_nodes); \
}
#define SERIAL_VECTOR_COMBINE_TEST( size ) \
TEST_F( serial, vector_combination##size##x) { \
test_vector_combinations<int,Kokkos::Serial>(size); \
}
#define SERIAL_DUALVIEW_COMBINE_TEST( size ) \
TEST_F( serial, dualview_combination##size##x) { \
test_dualview_combinations<int,Kokkos::Serial>(size); \
}
#define SERIAL_SEGMENTEDVIEW_TEST( size ) \
TEST_F( serial, segmentedview_##size##x) { \
test_segmented_view<double,Kokkos::Serial>(size); \
}
SERIAL_INSERT_TEST(close, 100000, 90000, 100, 500, true)
SERIAL_INSERT_TEST(far, 100000, 90000, 100, 500, false)
SERIAL_FAILED_INSERT_TEST( 10000, 1000 )
SERIAL_DEEP_COPY( 10000, 1 )
SERIAL_VECTOR_COMBINE_TEST( 10 )
SERIAL_VECTOR_COMBINE_TEST( 3057 )
SERIAL_DUALVIEW_COMBINE_TEST( 10 )
SERIAL_SEGMENTEDVIEW_TEST( 10000 )
#undef SERIAL_INSERT_TEST
#undef SERIAL_FAILED_INSERT_TEST
#undef SERIAL_ASSIGNEMENT_TEST
#undef SERIAL_DEEP_COPY
#undef SERIAL_VECTOR_COMBINE_TEST
#undef SERIAL_DUALVIEW_COMBINE_TEST
#undef SERIAL_SEGMENTEDVIEW_TEST
} // namespace test
#endif // KOKKOS_HAVE_SERIAL

View File

@ -1,149 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <vector>
#include <Kokkos_StaticCrsGraph.hpp>
/*--------------------------------------------------------------------------*/
namespace TestStaticCrsGraph {
template< class Space >
void run_test_graph()
{
typedef Kokkos::StaticCrsGraph< unsigned , Space > dView ;
typedef typename dView::HostMirror hView ;
const unsigned LENGTH = 1000 ;
dView dx ;
hView hx ;
std::vector< std::vector< int > > graph( LENGTH );
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
graph[i].reserve(8);
for ( size_t j = 0 ; j < 8 ; ++j ) {
graph[i].push_back( i + j * 3 );
}
}
dx = Kokkos::create_staticcrsgraph<dView>( "dx" , graph );
hx = Kokkos::create_mirror( dx );
ASSERT_EQ( hx.row_map.dimension_0() - 1 , LENGTH );
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
const size_t begin = hx.row_map[i];
const size_t n = hx.row_map[i+1] - begin ;
ASSERT_EQ( n , graph[i].size() );
for ( size_t j = 0 ; j < n ; ++j ) {
ASSERT_EQ( (int) hx.entries( j + begin ) , graph[i][j] );
}
}
}
template< class Space >
void run_test_graph2()
{
typedef Kokkos::StaticCrsGraph< unsigned[3] , Space > dView ;
typedef typename dView::HostMirror hView ;
const unsigned LENGTH = 10 ;
std::vector< size_t > sizes( LENGTH );
size_t total_length = 0 ;
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
total_length += ( sizes[i] = 6 + i % 4 );
}
dView dx = Kokkos::create_staticcrsgraph<dView>( "test" , sizes );
hView hx = Kokkos::create_mirror( dx );
hView mx = Kokkos::create_mirror( dx );
ASSERT_EQ( (size_t) dx.row_map.dimension_0() , (size_t) LENGTH + 1 );
ASSERT_EQ( (size_t) hx.row_map.dimension_0() , (size_t) LENGTH + 1 );
ASSERT_EQ( (size_t) mx.row_map.dimension_0() , (size_t) LENGTH + 1 );
ASSERT_EQ( (size_t) dx.entries.dimension_0() , (size_t) total_length );
ASSERT_EQ( (size_t) hx.entries.dimension_0() , (size_t) total_length );
ASSERT_EQ( (size_t) mx.entries.dimension_0() , (size_t) total_length );
ASSERT_EQ( (size_t) dx.entries.dimension_1() , (size_t) 3 );
ASSERT_EQ( (size_t) hx.entries.dimension_1() , (size_t) 3 );
ASSERT_EQ( (size_t) mx.entries.dimension_1() , (size_t) 3 );
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
const size_t entry_begin = hx.row_map[i];
const size_t entry_end = hx.row_map[i+1];
for ( size_t j = entry_begin ; j < entry_end ; ++j ) {
hx.entries(j,0) = j + 1 ;
hx.entries(j,1) = j + 2 ;
hx.entries(j,2) = j + 3 ;
}
}
Kokkos::deep_copy( dx.entries , hx.entries );
Kokkos::deep_copy( mx.entries , dx.entries );
ASSERT_EQ( mx.row_map.dimension_0() , (size_t) LENGTH + 1 );
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
const size_t entry_begin = mx.row_map[i];
const size_t entry_end = mx.row_map[i+1];
ASSERT_EQ( ( entry_end - entry_begin ) , sizes[i] );
for ( size_t j = entry_begin ; j < entry_end ; ++j ) {
ASSERT_EQ( (size_t) mx.entries( j , 0 ) , ( j + 1 ) );
ASSERT_EQ( (size_t) mx.entries( j , 1 ) , ( j + 2 ) );
ASSERT_EQ( (size_t) mx.entries( j , 2 ) , ( j + 3 ) );
}
}
}
} /* namespace TestStaticCrsGraph */

View File

@ -1,168 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_PTHREAD )
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_Vector.hpp>
#include <iomanip>
//----------------------------------------------------------------------------
#include <TestBitset.hpp>
#include <TestUnorderedMap.hpp>
#include <TestStaticCrsGraph.hpp>
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestSegmentedView.hpp>
namespace Test {
class threads : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count()
* Kokkos::hwloc::get_available_cores_per_numa()
// * Kokkos::hwloc::get_available_threads_per_core()
;
}
std::cout << "Threads: " << num_threads << std::endl;
Kokkos::Threads::initialize( num_threads );
}
static void TearDownTestCase()
{
Kokkos::Threads::finalize();
}
};
TEST_F( threads , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Threads >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Threads >();
}
/*TEST_F( threads, bitset )
{
test_bitset<Kokkos::Threads>();
}*/
#define THREADS_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
TEST_F( threads, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_insert<Kokkos::Threads>(num_nodes,num_inserts,num_duplicates, near); \
}
#define THREADS_FAILED_INSERT_TEST( num_nodes, repeat ) \
TEST_F( threads, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_failed_insert<Kokkos::Threads>(num_nodes); \
}
#define THREADS_ASSIGNEMENT_TEST( num_nodes, repeat ) \
TEST_F( threads, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_assignement_operators<Kokkos::Threads>(num_nodes); \
}
#define THREADS_DEEP_COPY( num_nodes, repeat ) \
TEST_F( threads, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_deep_copy<Kokkos::Threads>(num_nodes); \
}
#define THREADS_VECTOR_COMBINE_TEST( size ) \
TEST_F( threads, vector_combination##size##x) { \
test_vector_combinations<int,Kokkos::Threads>(size); \
}
#define THREADS_DUALVIEW_COMBINE_TEST( size ) \
TEST_F( threads, dualview_combination##size##x) { \
test_dualview_combinations<int,Kokkos::Threads>(size); \
}
#define THREADS_SEGMENTEDVIEW_TEST( size ) \
TEST_F( threads, segmentedview_##size##x) { \
test_segmented_view<double,Kokkos::Threads>(size); \
}
THREADS_INSERT_TEST(far, 100000, 90000, 100, 500, false)
THREADS_FAILED_INSERT_TEST( 10000, 1000 )
THREADS_DEEP_COPY( 10000, 1 )
THREADS_VECTOR_COMBINE_TEST( 10 )
THREADS_VECTOR_COMBINE_TEST( 3057 )
THREADS_DUALVIEW_COMBINE_TEST( 10 )
THREADS_SEGMENTEDVIEW_TEST( 10000 )
#undef THREADS_INSERT_TEST
#undef THREADS_FAILED_INSERT_TEST
#undef THREADS_ASSIGNEMENT_TEST
#undef THREADS_DEEP_COPY
#undef THREADS_VECTOR_COMBINE_TEST
#undef THREADS_DUALVIEW_COMBINE_TEST
#undef THREADS_SEGMENTEDVIEW_TEST
} // namespace Test
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */

View File

@ -1,313 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_UNORDERED_MAP_HPP
#define KOKKOS_TEST_UNORDERED_MAP_HPP
#include <gtest/gtest.h>
#include <iostream>
namespace Test {
namespace Impl {
template <typename MapType, bool Near = false>
struct TestInsert
{
typedef MapType map_type;
typedef typename map_type::execution_space execution_space;
typedef uint32_t value_type;
map_type map;
uint32_t inserts;
uint32_t collisions;
TestInsert( map_type arg_map, uint32_t arg_inserts, uint32_t arg_collisions)
: map(arg_map)
, inserts(arg_inserts)
, collisions(arg_collisions)
{}
void testit( bool rehash_on_fail = true )
{
execution_space::fence();
uint32_t failed_count = 0;
do {
failed_count = 0;
Kokkos::parallel_reduce(inserts, *this, failed_count);
if (rehash_on_fail && failed_count > 0u) {
const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + failed_count/collisions ;
map.rehash( new_capacity );
}
} while (rehash_on_fail && failed_count > 0u);
execution_space::fence();
}
KOKKOS_INLINE_FUNCTION
void init( value_type & failed_count ) const { failed_count = 0; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & failed_count, const volatile value_type & count ) const
{ failed_count += count; }
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & failed_count) const
{
const uint32_t key = Near ? i/collisions : i%(inserts/collisions);
if (map.insert(key,i).failed()) ++failed_count;
}
};
template <typename MapType, bool Near>
struct TestErase
{
typedef TestErase<MapType, Near> self_type;
typedef MapType map_type;
typedef typename MapType::execution_space execution_space;
map_type m_map;
uint32_t m_num_erase;
uint32_t m_num_duplicates;
TestErase(map_type map, uint32_t num_erases, uint32_t num_duplicates)
: m_map(map)
, m_num_erase(num_erases)
, m_num_duplicates(num_duplicates)
{}
void testit()
{
execution_space::fence();
Kokkos::parallel_for(m_num_erase, *this);
execution_space::fence();
}
KOKKOS_INLINE_FUNCTION
void operator()(typename execution_space::size_type i) const
{
if (Near) {
m_map.erase(i/m_num_duplicates);
}
else {
m_map.erase(i%(m_num_erase/m_num_duplicates));
}
}
};
template <typename MapType>
struct TestFind
{
typedef MapType map_type;
typedef typename MapType::execution_space::execution_space execution_space;
typedef uint32_t value_type;
map_type m_map;
uint32_t m_num_insert;
uint32_t m_num_duplicates;
uint32_t m_max_key;
TestFind(map_type map, uint32_t num_inserts, uint32_t num_duplicates)
: m_map(map)
, m_num_insert(num_inserts)
, m_num_duplicates(num_duplicates)
, m_max_key( ((num_inserts + num_duplicates) - 1)/num_duplicates )
{}
void testit(value_type &errors)
{
execution_space::execution_space::fence();
Kokkos::parallel_reduce(m_map.capacity(), *this, errors);
execution_space::execution_space::fence();
}
KOKKOS_INLINE_FUNCTION
static void init( value_type & dst)
{
dst = 0;
}
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & dst, const volatile value_type & src)
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(typename execution_space::size_type i, value_type & errors) const
{
const bool expect_to_find_i = (i < m_max_key);
const bool exists = m_map.exists(i);
if (expect_to_find_i && !exists) ++errors;
if (!expect_to_find_i && exists) ++errors;
}
};
} // namespace Impl
template <typename Device>
void test_insert( uint32_t num_nodes , uint32_t num_inserts , uint32_t num_duplicates , bool near )
{
typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type;
typedef Kokkos::UnorderedMap<const uint32_t,const uint32_t, Device> const_map_type;
const uint32_t expected_inserts = (num_inserts + num_duplicates -1u) / num_duplicates;
map_type map;
map.rehash(num_nodes,false);
if (near) {
Impl::TestInsert<map_type,true> test_insert(map, num_inserts, num_duplicates);
test_insert.testit();
} else
{
Impl::TestInsert<map_type,false> test_insert(map, num_inserts, num_duplicates);
test_insert.testit();
}
const bool print_list = false;
if (print_list) {
Kokkos::Impl::UnorderedMapPrint<map_type> f(map);
f.apply();
}
const uint32_t map_size = map.size();
ASSERT_FALSE( map.failed_insert());
{
EXPECT_EQ(expected_inserts, map_size);
{
uint32_t find_errors = 0;
Impl::TestFind<const_map_type> test_find(map, num_inserts, num_duplicates);
test_find.testit(find_errors);
EXPECT_EQ( 0u, find_errors);
}
map.begin_erase();
Impl::TestErase<map_type,false> test_erase(map, num_inserts, num_duplicates);
test_erase.testit();
map.end_erase();
EXPECT_EQ(0u, map.size());
}
}
template <typename Device>
void test_failed_insert( uint32_t num_nodes)
{
typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type;
map_type map(num_nodes);
Impl::TestInsert<map_type> test_insert(map, 2u*num_nodes, 1u);
test_insert.testit(false /*don't rehash on fail*/);
Device::execution_space::fence();
EXPECT_TRUE( map.failed_insert() );
}
template <typename Device>
void test_deep_copy( uint32_t num_nodes )
{
typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type;
typedef Kokkos::UnorderedMap<const uint32_t, const uint32_t, Device> const_map_type;
typedef typename map_type::HostMirror host_map_type ;
// typedef Kokkos::UnorderedMap<uint32_t, uint32_t, typename Device::host_mirror_execution_space > host_map_type;
map_type map;
map.rehash(num_nodes,false);
{
Impl::TestInsert<map_type> test_insert(map, num_nodes, 1);
test_insert.testit();
ASSERT_EQ( map.size(), num_nodes);
ASSERT_FALSE( map.failed_insert() );
{
uint32_t find_errors = 0;
Impl::TestFind<map_type> test_find(map, num_nodes, 1);
test_find.testit(find_errors);
EXPECT_EQ( find_errors, 0u);
}
}
host_map_type hmap;
Kokkos::deep_copy(hmap, map);
ASSERT_EQ( map.size(), hmap.size());
ASSERT_EQ( map.capacity(), hmap.capacity());
{
uint32_t find_errors = 0;
Impl::TestFind<host_map_type> test_find(hmap, num_nodes, 1);
test_find.testit(find_errors);
EXPECT_EQ( find_errors, 0u);
}
map_type mmap;
Kokkos::deep_copy(mmap, hmap);
const_map_type cmap = mmap;
EXPECT_EQ( cmap.size(), num_nodes);
{
uint32_t find_errors = 0;
Impl::TestFind<const_map_type> test_find(cmap, num_nodes, 1);
test_find.testit(find_errors);
EXPECT_EQ( find_errors, 0u);
}
}
} // namespace Test
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP

View File

@ -1,131 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_VECTOR_HPP
#define KOKKOS_TEST_VECTOR_HPP
#include <gtest/gtest.h>
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <impl/Kokkos_Timer.hpp>
namespace Test {
namespace Impl {
template <typename Scalar, class Device>
struct test_vector_combinations
{
typedef test_vector_combinations<Scalar,Device> self_type;
typedef Scalar scalar_type;
typedef Device execution_space;
Scalar reference;
Scalar result;
template <typename Vector>
Scalar run_me(unsigned int n){
Vector a(n,1);
a.push_back(2);
a.resize(n+4);
a[n+1] = 3;
a[n+2] = 4;
a[n+3] = 5;
Scalar temp1 = a[2];
Scalar temp2 = a[n];
Scalar temp3 = a[n+1];
a.assign(n+2,-1);
a[2] = temp1;
a[n] = temp2;
a[n+1] = temp3;
Scalar test1 = 0;
for(unsigned int i=0; i<a.size(); i++)
test1+=a[i];
a.assign(n+1,-2);
Scalar test2 = 0;
for(unsigned int i=0; i<a.size(); i++)
test2+=a[i];
a.reserve(n+10);
Scalar test3 = 0;
for(unsigned int i=0; i<a.size(); i++)
test3+=a[i];
return (test1*test2+test3)*test2+test1*test3;
}
test_vector_combinations(unsigned int size)
{
reference = run_me<std::vector<Scalar> >(size);
result = run_me<Kokkos::vector<Scalar,Device> >(size);
}
};
} // namespace Impl
template <typename Scalar, typename Device>
void test_vector_combinations(unsigned int size)
{
Impl::test_vector_combinations<Scalar,Device> test(size);
ASSERT_EQ( test.reference, test.result);
}
} // namespace Test
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP

View File

@ -1,50 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
int main(int argc, char *argv[]) {
::testing::InitGoogleTest(&argc,argv);
return RUN_ALL_TESTS();
}

View File

@ -1,66 +0,0 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/core/perf_test
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/perf_test
TEST_TARGETS =
TARGETS =
OBJ_PERF = PerfTestHost.o PerfTestCuda.o PerfTestMain.o gtest-all.o
TARGETS += KokkosCore_PerformanceTest
TEST_TARGETS += test-performance
OBJ_ATOMICS = test_atomic.o
TARGETS += KokkosCore_PerformanceTest_Atomics
TEST_TARGETS += test-atomic
KokkosCore_PerformanceTest: $(OBJ_PERF) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_PERF) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest
KokkosCore_PerformanceTest_Atomics: $(OBJ_ATOMICS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_ATOMICS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest_Atomics
test-performance: KokkosCore_PerformanceTest
./KokkosCore_PerformanceTest
test-atomic: KokkosCore_PerformanceTest_Atomics
./KokkosCore_PerformanceTest_Atomics
build_all: $(TARGETS)
test: $(TEST_TARGETS)
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc

View File

@ -1,309 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BLAS_KERNELS_HPP
#define KOKKOS_BLAS_KERNELS_HPP
namespace Kokkos {
template< class ConstVectorType ,
class Device = typename ConstVectorType::execution_space >
struct Dot ;
template< class ConstVectorType ,
class Device = typename ConstVectorType::execution_space >
struct DotSingle ;
template< class ConstScalarType ,
class VectorType ,
class Device = typename VectorType::execution_space >
struct Scale ;
template< class ConstScalarType ,
class ConstVectorType ,
class VectorType ,
class Device = typename VectorType::execution_space >
struct AXPBY ;
/** \brief Y = alpha * X + beta * Y */
template< class ConstScalarType ,
class ConstVectorType ,
class VectorType >
void axpby( const ConstScalarType & alpha ,
const ConstVectorType & X ,
const ConstScalarType & beta ,
const VectorType & Y )
{
typedef AXPBY< ConstScalarType , ConstVectorType , VectorType > functor ;
parallel_for( Y.dimension_0() , functor( alpha , X , beta , Y ) );
}
/** \brief Y *= alpha */
template< class ConstScalarType ,
class VectorType >
void scale( const ConstScalarType & alpha , const VectorType & Y )
{
typedef Scale< ConstScalarType , VectorType > functor ;
parallel_for( Y.dimension_0() , functor( alpha , Y ) );
}
template< class ConstVectorType ,
class Finalize >
void dot( const ConstVectorType & X ,
const ConstVectorType & Y ,
const Finalize & finalize )
{
typedef Dot< ConstVectorType > functor ;
parallel_reduce( X.dimension_0() , functor( X , Y ) , finalize );
}
template< class ConstVectorType ,
class Finalize >
void dot( const ConstVectorType & X ,
const Finalize & finalize )
{
typedef DotSingle< ConstVectorType > functor ;
parallel_reduce( X.dimension_0() , functor( X ) , finalize );
}
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class Type , class Device >
struct Dot
{
typedef typename Device::execution_space execution_space ;
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
Impl::unsigned_< Type::Rank > >::type ok_rank ;
/* typedef typename
Impl::StaticAssertSame< execution_space ,
typename Type::execution_space >::type ok_device ;*/
typedef double value_type ;
#if 1
typename Type::const_type X ;
typename Type::const_type Y ;
#else
Type X ;
Type Y ;
#endif
Dot( const Type & arg_x , const Type & arg_y )
: X(arg_x) , Y(arg_y) { }
KOKKOS_INLINE_FUNCTION
void operator()( int i , value_type & update ) const
{ update += X[i] * Y[i]; }
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & source )
{ update += source; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
};
template< class Type , class Device >
struct DotSingle
{
typedef typename Device::execution_space execution_space ;
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
Impl::unsigned_< Type::Rank > >::type ok_rank ;
/* typedef typename
Impl::StaticAssertSame< execution_space ,
typename Type::execution_space >::type ok_device ;*/
typedef double value_type ;
#if 1
typename Type::const_type X ;
#else
Type X ;
#endif
DotSingle( const Type & arg_x ) : X(arg_x) {}
KOKKOS_INLINE_FUNCTION
void operator()( int i , value_type & update ) const
{
const typename Type::value_type & x = X[i]; update += x * x ;
}
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & source )
{ update += source; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
};
template< class ScalarType , class VectorType , class Device>
struct Scale
{
typedef typename Device::execution_space execution_space ;
/* typedef typename
Impl::StaticAssertSame< execution_space ,
typename ScalarType::execution_space >::type
ok_scalar_device ;
typedef typename
Impl::StaticAssertSame< execution_space ,
typename VectorType::execution_space >::type
ok_vector_device ;*/
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 0 > ,
Impl::unsigned_< ScalarType::Rank > >::type
ok_scalar_rank ;
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
Impl::unsigned_< VectorType::Rank > >::type
ok_vector_rank ;
#if 1
typename ScalarType::const_type alpha ;
#else
ScalarType alpha ;
#endif
VectorType Y ;
Scale( const ScalarType & arg_alpha , const VectorType & arg_Y )
: alpha( arg_alpha ), Y( arg_Y ) {}
KOKKOS_INLINE_FUNCTION
void operator()( int i ) const
{
Y[i] *= alpha() ;
}
};
template< class ScalarType ,
class ConstVectorType ,
class VectorType,
class Device>
struct AXPBY
{
typedef typename Device::execution_space execution_space ;
/* typedef typename
Impl::StaticAssertSame< execution_space ,
typename ScalarType::execution_space >::type
ok_scalar_device ;
typedef typename
Impl::StaticAssertSame< execution_space ,
typename ConstVectorType::execution_space >::type
ok_const_vector_device ;
typedef typename
Impl::StaticAssertSame< execution_space ,
typename VectorType::execution_space >::type
ok_vector_device ;*/
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 0 > ,
Impl::unsigned_< ScalarType::Rank > >::type
ok_scalar_rank ;
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
Impl::unsigned_< ConstVectorType::Rank > >::type
ok_const_vector_rank ;
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
Impl::unsigned_< VectorType::Rank > >::type
ok_vector_rank ;
#if 1
typename ScalarType::const_type alpha , beta ;
typename ConstVectorType::const_type X ;
#else
ScalarType alpha , beta ;
ConstVectorType X ;
#endif
VectorType Y ;
AXPBY( const ScalarType & arg_alpha ,
const ConstVectorType & arg_X ,
const ScalarType & arg_beta ,
const VectorType & arg_Y )
: alpha( arg_alpha ), beta( arg_beta ), X( arg_X ), Y( arg_Y ) {}
KOKKOS_INLINE_FUNCTION
void operator()( int i ) const
{
Y[i] = alpha() * X[i] + beta() * Y[i] ;
}
};
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_BLAS_KERNELS_HPP */

View File

@ -1,189 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <iostream>
#include <iomanip>
#include <algorithm>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#include <impl/Kokkos_Timer.hpp>
#include <PerfTestHexGrad.hpp>
#include <PerfTestBlasKernels.hpp>
#include <PerfTestGramSchmidt.hpp>
#include <PerfTestDriver.hpp>
namespace Test {
class cuda : public ::testing::Test {
protected:
static void SetUpTestCase() {
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase() {
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};
TEST_F( cuda, hexgrad )
{
EXPECT_NO_THROW( run_test_hexgrad< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) );
}
TEST_F( cuda, gramschmidt )
{
EXPECT_NO_THROW( run_test_gramschmidt< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) );
}
namespace {
template <typename T>
struct TextureFetch
{
typedef Kokkos::View< T *, Kokkos::CudaSpace> array_type;
typedef Kokkos::View< const T *, Kokkos::CudaSpace, Kokkos::MemoryRandomAccess> const_array_type;
typedef Kokkos::View< int *, Kokkos::CudaSpace> index_array_type;
typedef Kokkos::View< const int *, Kokkos::CudaSpace> const_index_array_type;
struct FillArray
{
array_type m_array;
FillArray( const array_type & array )
: m_array(array)
{}
void apply() const
{
Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.dimension_0()), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(int i) const { m_array(i) = i; }
};
struct RandomIndexes
{
index_array_type m_indexes;
typename index_array_type::HostMirror m_host_indexes;
RandomIndexes( const index_array_type & indexes)
: m_indexes(indexes)
, m_host_indexes(Kokkos::create_mirror(m_indexes))
{}
void apply() const
{
Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::HostSpace::execution_space,int>(0,m_host_indexes.dimension_0()), *this);
//random shuffle
Kokkos::HostSpace::execution_space::fence();
std::random_shuffle(m_host_indexes.ptr_on_device(), m_host_indexes.ptr_on_device() + m_host_indexes.dimension_0());
Kokkos::deep_copy(m_indexes,m_host_indexes);
}
KOKKOS_INLINE_FUNCTION
void operator()(int i) const { m_host_indexes(i) = i; }
};
struct RandomReduce
{
const_array_type m_array;
const_index_array_type m_indexes;
RandomReduce( const const_array_type & array, const const_index_array_type & indexes)
: m_array(array)
, m_indexes(indexes)
{}
void apply(T & reduce) const
{
Kokkos::parallel_reduce( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.dimension_0()), *this, reduce);
}
KOKKOS_INLINE_FUNCTION
void operator()(int i, T & reduce) const
{ reduce += m_array(m_indexes(i)); }
};
static void run(int size, double & reduce_time, T &reduce)
{
array_type array("array",size);
index_array_type indexes("indexes",size);
{ FillArray f(array); f.apply(); }
{ RandomIndexes f(indexes); f.apply(); }
Kokkos::Cuda::fence();
Kokkos::Impl::Timer timer;
for (int j=0; j<10; ++j) {
RandomReduce f(array,indexes);
f.apply(reduce);
}
Kokkos::Cuda::fence();
reduce_time = timer.seconds();
}
};
} // unnamed namespace
TEST_F( cuda, texture_double )
{
printf("Random reduce of double through texture fetch\n");
for (int i=1; i<=27; ++i) {
int size = 1<<i;
double time = 0;
double reduce = 0;
TextureFetch<double>::run(size,time,reduce);
printf(" time = %1.3e size = 2^%d\n", time, i);
}
}
} // namespace Test
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */

View File

@ -1,152 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <iostream>
#include <string>
// mfh 06 Jun 2013: This macro doesn't work like one might thing it
// should. It doesn't take the template parameter DeviceType and
// print its actual type name; it just literally prints out
// "DeviceType". I've worked around this below without using the
// macro, so I'm commenting out the macro to avoid compiler complaints
// about an unused macro.
// #define KOKKOS_MACRO_IMPL_TO_STRING( X ) #X
// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_MACRO_IMPL_TO_STRING( X )
//------------------------------------------------------------------------
namespace Test {
enum { NUMBER_OF_TRIALS = 5 };
template< class DeviceType >
void run_test_hexgrad( int exp_beg , int exp_end, const char deviceTypeName[] )
{
std::string label_hexgrad ;
label_hexgrad.append( "\"HexGrad< double , " );
// mfh 06 Jun 2013: This only appends "DeviceType" (literally) to
// the string, not the actual name of the device type. Thus, I've
// modified the function to take the name of the device type.
//
//label_hexgrad.append( KOKKOS_MACRO_TO_STRING( DeviceType ) );
label_hexgrad.append( deviceTypeName );
label_hexgrad.append( " >\"" );
for (int i = exp_beg ; i < exp_end ; ++i) {
double min_seconds = 0.0 ;
double max_seconds = 0.0 ;
double avg_seconds = 0.0 ;
const int parallel_work_length = 1<<i;
for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) {
const double seconds = HexGrad< DeviceType >::test(parallel_work_length) ;
if ( 0 == j ) {
min_seconds = seconds ;
max_seconds = seconds ;
}
else {
if ( seconds < min_seconds ) min_seconds = seconds ;
if ( seconds > max_seconds ) max_seconds = seconds ;
}
avg_seconds += seconds ;
}
avg_seconds /= NUMBER_OF_TRIALS ;
std::cout << label_hexgrad
<< " , " << parallel_work_length
<< " , " << min_seconds
<< " , " << ( min_seconds / parallel_work_length )
<< std::endl ;
}
}
template< class DeviceType >
void run_test_gramschmidt( int exp_beg , int exp_end, const char deviceTypeName[] )
{
std::string label_gramschmidt ;
label_gramschmidt.append( "\"GramSchmidt< double , " );
// mfh 06 Jun 2013: This only appends "DeviceType" (literally) to
// the string, not the actual name of the device type. Thus, I've
// modified the function to take the name of the device type.
//
//label_gramschmidt.append( KOKKOS_MACRO_TO_STRING( DeviceType ) );
label_gramschmidt.append( deviceTypeName );
label_gramschmidt.append( " >\"" );
for (int i = exp_beg ; i < exp_end ; ++i) {
double min_seconds = 0.0 ;
double max_seconds = 0.0 ;
double avg_seconds = 0.0 ;
const int parallel_work_length = 1<<i;
for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) {
const double seconds = ModifiedGramSchmidt< double , DeviceType >::test(parallel_work_length, 32 ) ;
if ( 0 == j ) {
min_seconds = seconds ;
max_seconds = seconds ;
}
else {
if ( seconds < min_seconds ) min_seconds = seconds ;
if ( seconds > max_seconds ) max_seconds = seconds ;
}
avg_seconds += seconds ;
}
avg_seconds /= NUMBER_OF_TRIALS ;
std::cout << label_gramschmidt
<< " , " << parallel_work_length
<< " , " << min_seconds
<< " , " << ( min_seconds / parallel_work_length )
<< std::endl ;
}
}
}

View File

@ -1,226 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cmath>
#include <PerfTestBlasKernels.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Test {
// Reduction : result = dot( Q(:,j) , Q(:,j) );
// PostProcess : R(j,j) = result ; inv = 1 / result ;
template< class VectorView , class ValueView >
struct InvNorm2 : public Kokkos::DotSingle< VectorView > {
typedef typename Kokkos::DotSingle< VectorView >::value_type value_type ;
ValueView Rjj ;
ValueView inv ;
InvNorm2( const VectorView & argX ,
const ValueView & argR ,
const ValueView & argInv )
: Kokkos::DotSingle< VectorView >( argX )
, Rjj( argR )
, inv( argInv )
{}
KOKKOS_INLINE_FUNCTION
void final( value_type & result ) const
{
result = sqrt( result );
Rjj() = result ;
inv() = ( 0 < result ) ? 1.0 / result : 0 ;
}
};
template< class VectorView , class ValueView >
inline
void invnorm2( const VectorView & x ,
const ValueView & r ,
const ValueView & r_inv )
{
Kokkos::parallel_reduce( x.dimension_0() , InvNorm2< VectorView , ValueView >( x , r , r_inv ) );
}
// PostProcess : tmp = - ( R(j,k) = result );
template< class VectorView , class ValueView >
struct DotM : public Kokkos::Dot< VectorView > {
typedef typename Kokkos::Dot< VectorView >::value_type value_type ;
ValueView Rjk ;
ValueView tmp ;
DotM( const VectorView & argX ,
const VectorView & argY ,
const ValueView & argR ,
const ValueView & argTmp )
: Kokkos::Dot< VectorView >( argX , argY )
, Rjk( argR )
, tmp( argTmp )
{}
KOKKOS_INLINE_FUNCTION
void final( value_type & result ) const
{
Rjk() = result ;
tmp() = - result ;
}
};
template< class VectorView , class ValueView >
inline
void dot_neg( const VectorView & x ,
const VectorView & y ,
const ValueView & r ,
const ValueView & r_neg )
{
Kokkos::parallel_reduce( x.dimension_0() , DotM< VectorView , ValueView >( x , y , r , r_neg ) );
}
template< typename Scalar , class DeviceType >
struct ModifiedGramSchmidt
{
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
typedef Kokkos::View< Scalar** ,
Kokkos::LayoutLeft ,
execution_space > multivector_type ;
typedef Kokkos::View< Scalar* ,
Kokkos::LayoutLeft ,
execution_space > vector_type ;
typedef Kokkos::View< Scalar ,
Kokkos::LayoutLeft ,
execution_space > value_view ;
multivector_type Q ;
multivector_type R ;
static double factorization( const multivector_type Q_ ,
const multivector_type R_ )
{
const size_type count = Q_.dimension_1();
value_view tmp("tmp");
value_view one("one");
Kokkos::deep_copy( one , (Scalar) 1 );
Kokkos::Impl::Timer timer ;
for ( size_type j = 0 ; j < count ; ++j ) {
// Reduction : tmp = dot( Q(:,j) , Q(:,j) );
// PostProcess : tmp = sqrt( tmp ); R(j,j) = tmp ; tmp = 1 / tmp ;
const vector_type Qj = Kokkos::subview( Q_ , Kokkos::ALL() , j );
const value_view Rjj = Kokkos::subview( R_ , j , j );
invnorm2( Qj , Rjj , tmp );
// Q(:,j) *= ( 1 / R(j,j) ); => Q(:,j) *= tmp ;
Kokkos::scale( tmp , Qj );
for ( size_t k = j + 1 ; k < count ; ++k ) {
const vector_type Qk = Kokkos::subview( Q_ , Kokkos::ALL() , k );
const value_view Rjk = Kokkos::subview( R_ , j , k );
// Reduction : R(j,k) = dot( Q(:,j) , Q(:,k) );
// PostProcess : tmp = - R(j,k);
dot_neg( Qj , Qk , Rjk , tmp );
// Q(:,k) -= R(j,k) * Q(:,j); => Q(:,k) += tmp * Q(:,j)
Kokkos::axpby( tmp , Qj , one , Qk );
}
}
execution_space::fence();
return timer.seconds();
}
//--------------------------------------------------------------------------
static double test( const size_t length ,
const size_t count ,
const size_t iter = 1 )
{
multivector_type Q_( "Q" , length , count );
multivector_type R_( "R" , count , count );
typename multivector_type::HostMirror A =
Kokkos::create_mirror( Q_ );
// Create and fill A on the host
for ( size_type j = 0 ; j < count ; ++j ) {
for ( size_type i = 0 ; i < length ; ++i ) {
A(i,j) = ( i + 1 ) * ( j + 1 );
}
}
double dt_min = 0 ;
for ( size_t i = 0 ; i < iter ; ++i ) {
Kokkos::deep_copy( Q_ , A );
// A = Q * R
const double dt = factorization( Q_ , R_ );
if ( 0 == i ) dt_min = dt ;
else dt_min = dt < dt_min ? dt : dt_min ;
}
return dt_min ;
}
};
}

View File

@ -1,268 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
namespace Test {
template< class DeviceType ,
typename CoordScalarType = double ,
typename GradScalarType = float >
struct HexGrad
{
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
typedef HexGrad<DeviceType,CoordScalarType,GradScalarType> self_type;
// 3D array : ( ParallelWork , Space , Node )
enum { NSpace = 3 , NNode = 8 };
typedef Kokkos::View< CoordScalarType*[NSpace][NNode] , execution_space >
elem_coord_type ;
typedef Kokkos::View< GradScalarType*[NSpace][NNode] , execution_space >
elem_grad_type ;
elem_coord_type coords ;
elem_grad_type grad_op ;
enum { FLOPS = 318 }; // = 3 * ( 18 + 8 * 11 ) };
enum { READS = 18 };
enum { WRITES = 18 };
HexGrad( const elem_coord_type & arg_coords ,
const elem_grad_type & arg_grad_op )
: coords( arg_coords )
, grad_op( arg_grad_op )
{}
KOKKOS_INLINE_FUNCTION static
void grad( const CoordScalarType x[] ,
const CoordScalarType z[] ,
GradScalarType grad_y[] )
{
const GradScalarType R42=(x[3] - x[1]);
const GradScalarType R52=(x[4] - x[1]);
const GradScalarType R54=(x[4] - x[3]);
const GradScalarType R63=(x[5] - x[2]);
const GradScalarType R83=(x[7] - x[2]);
const GradScalarType R86=(x[7] - x[5]);
const GradScalarType R31=(x[2] - x[0]);
const GradScalarType R61=(x[5] - x[0]);
const GradScalarType R74=(x[6] - x[3]);
const GradScalarType R72=(x[6] - x[1]);
const GradScalarType R75=(x[6] - x[4]);
const GradScalarType R81=(x[7] - x[0]);
const GradScalarType t1=(R63 + R54);
const GradScalarType t2=(R61 + R74);
const GradScalarType t3=(R72 + R81);
const GradScalarType t4 =(R86 + R42);
const GradScalarType t5 =(R83 + R52);
const GradScalarType t6 =(R75 + R31);
// Calculate Y gradient from X and Z data
grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54);
grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61);
grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72);
grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83);
grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61);
grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72);
grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83);
grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54);
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type ielem ) const
{
GradScalarType g[NNode] ;
const CoordScalarType x[NNode] = {
coords(ielem,0,0),
coords(ielem,0,1),
coords(ielem,0,2),
coords(ielem,0,3),
coords(ielem,0,4),
coords(ielem,0,5),
coords(ielem,0,6),
coords(ielem,0,7)
};
const CoordScalarType y[NNode] = {
coords(ielem,1,0),
coords(ielem,1,1),
coords(ielem,1,2),
coords(ielem,1,3),
coords(ielem,1,4),
coords(ielem,1,5),
coords(ielem,1,6),
coords(ielem,1,7)
};
const CoordScalarType z[NNode] = {
coords(ielem,2,0),
coords(ielem,2,1),
coords(ielem,2,2),
coords(ielem,2,3),
coords(ielem,2,4),
coords(ielem,2,5),
coords(ielem,2,6),
coords(ielem,2,7)
};
grad( z , y , g );
grad_op(ielem,0,0) = g[0];
grad_op(ielem,0,1) = g[1];
grad_op(ielem,0,2) = g[2];
grad_op(ielem,0,3) = g[3];
grad_op(ielem,0,4) = g[4];
grad_op(ielem,0,5) = g[5];
grad_op(ielem,0,6) = g[6];
grad_op(ielem,0,7) = g[7];
grad( x , z , g );
grad_op(ielem,1,0) = g[0];
grad_op(ielem,1,1) = g[1];
grad_op(ielem,1,2) = g[2];
grad_op(ielem,1,3) = g[3];
grad_op(ielem,1,4) = g[4];
grad_op(ielem,1,5) = g[5];
grad_op(ielem,1,6) = g[6];
grad_op(ielem,1,7) = g[7];
grad( y , x , g );
grad_op(ielem,2,0) = g[0];
grad_op(ielem,2,1) = g[1];
grad_op(ielem,2,2) = g[2];
grad_op(ielem,2,3) = g[3];
grad_op(ielem,2,4) = g[4];
grad_op(ielem,2,5) = g[5];
grad_op(ielem,2,6) = g[6];
grad_op(ielem,2,7) = g[7];
}
//--------------------------------------------------------------------------
struct Init {
typedef typename self_type::execution_space execution_space ;
elem_coord_type coords ;
Init( const elem_coord_type & arg_coords )
: coords( arg_coords ) {}
KOKKOS_INLINE_FUNCTION
void operator()( size_type ielem ) const
{
coords(ielem,0,0) = 0.;
coords(ielem,1,0) = 0.;
coords(ielem,2,0) = 0.;
coords(ielem,0,1) = 1.;
coords(ielem,1,1) = 0.;
coords(ielem,2,1) = 0.;
coords(ielem,0,2) = 1.;
coords(ielem,1,2) = 1.;
coords(ielem,2,2) = 0.;
coords(ielem,0,3) = 0.;
coords(ielem,1,3) = 1.;
coords(ielem,2,3) = 0.;
coords(ielem,0,4) = 0.;
coords(ielem,1,4) = 0.;
coords(ielem,2,4) = 1.;
coords(ielem,0,5) = 1.;
coords(ielem,1,5) = 0.;
coords(ielem,2,5) = 1.;
coords(ielem,0,6) = 1.;
coords(ielem,1,6) = 1.;
coords(ielem,2,6) = 1.;
coords(ielem,0,7) = 0.;
coords(ielem,1,7) = 1.;
coords(ielem,2,7) = 1.;
}
};
//--------------------------------------------------------------------------
static double test( const int count , const int iter = 1 )
{
elem_coord_type coord( "coord" , count );
elem_grad_type grad ( "grad" , count );
// Execute the parallel kernels on the arrays:
double dt_min = 0 ;
Kokkos::parallel_for( count , Init( coord ) );
execution_space::fence();
for ( int i = 0 ; i < iter ; ++i ) {
Kokkos::Impl::Timer timer ;
Kokkos::parallel_for( count , HexGrad<execution_space>( coord , grad ) );
execution_space::fence();
const double dt = timer.seconds();
if ( 0 == i ) dt_min = dt ;
else dt_min = dt < dt_min ? dt : dt_min ;
}
return dt_min ;
}
};
}

View File

@ -1,104 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_OPENMP )
typedef Kokkos::OpenMP TestHostDevice ;
const char TestHostDeviceName[] = "Kokkos::OpenMP" ;
#elif defined( KOKKOS_HAVE_PTHREAD )
typedef Kokkos::Threads TestHostDevice ;
const char TestHostDeviceName[] = "Kokkos::Threads" ;
#elif defined( KOKKOS_HAVE_SERIAL )
typedef Kokkos::Serial TestHostDevice ;
const char TestHostDeviceName[] = "Kokkos::Serial" ;
#else
# error "You must enable at least one of the following execution spaces in order to build this test: Kokkos::Threads, Kokkos::OpenMP, or Kokkos::Serial."
#endif
#include <impl/Kokkos_Timer.hpp>
#include <PerfTestHexGrad.hpp>
#include <PerfTestBlasKernels.hpp>
#include <PerfTestGramSchmidt.hpp>
#include <PerfTestDriver.hpp>
//------------------------------------------------------------------------
namespace Test {
class host : public ::testing::Test {
protected:
static void SetUpTestCase()
{
const unsigned team_count = Kokkos::hwloc::get_available_numa_count();
const unsigned threads_per_team = 4 ;
TestHostDevice::initialize( team_count * threads_per_team );
}
static void TearDownTestCase()
{
TestHostDevice::finalize();
}
};
TEST_F( host, hexgrad ) {
EXPECT_NO_THROW(run_test_hexgrad< TestHostDevice>( 10, 20, TestHostDeviceName ));
}
TEST_F( host, gramschmidt ) {
EXPECT_NO_THROW(run_test_gramschmidt< TestHostDevice>( 10, 20, TestHostDeviceName ));
}
} // namespace Test

View File

@ -1,49 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
int main(int argc, char *argv[]) {
::testing::InitGoogleTest(&argc,argv);
return RUN_ALL_TESTS();
}

View File

@ -1,504 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
typedef Kokkos::DefaultExecutionSpace exec_space;
#define RESET 0
#define BRIGHT 1
#define DIM 2
#define UNDERLINE 3
#define BLINK 4
#define REVERSE 7
#define HIDDEN 8
#define BLACK 0
#define RED 1
#define GREEN 2
#define YELLOW 3
#define BLUE 4
#define MAGENTA 5
#define CYAN 6
#define GREY 7
#define WHITE 8
void textcolor(int attr, int fg, int bg)
{ char command[13];
/* Command is the control command to the terminal */
sprintf(command, "%c[%d;%d;%dm", 0x1B, attr, fg + 30, bg + 40);
printf("%s", command);
}
void textcolor_standard() {textcolor(RESET, BLACK, WHITE);}
template<class T,class DEVICE_TYPE>
struct ZeroFunctor{
typedef DEVICE_TYPE execution_space;
typedef typename Kokkos::View<T,execution_space> type;
typedef typename Kokkos::View<T,execution_space>::HostMirror h_type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
data() = 0;
}
};
//---------------------------------------------------
//--------------atomic_fetch_add---------------------
//---------------------------------------------------
template<class T,class DEVICE_TYPE>
struct AddFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
Kokkos::atomic_fetch_add(&data(),(T)1);
}
};
template<class T>
T AddLoop(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct AddFunctor<T,exec_space> f_add;
f_add.data = data;
Kokkos::parallel_for(loop,f_add);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T,class DEVICE_TYPE>
struct AddNonAtomicFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
data()+=(T)1;
}
};
template<class T>
T AddLoopNonAtomic(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct AddNonAtomicFunctor<T,exec_space> f_add;
f_add.data = data;
Kokkos::parallel_for(loop,f_add);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T>
T AddLoopSerial(int loop) {
T* data = new T[1];
data[0] = 0;
for(int i=0;i<loop;i++)
*data+=(T)1;
T val = *data;
delete data;
return val;
}
template<class T,class DEVICE_TYPE>
struct CASFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
T old = data();
T newval, assumed;
do {
assumed = old;
newval = assumed + (T)1;
old = Kokkos::atomic_compare_exchange(&data(), assumed, newval);
}
while( old != assumed );
}
};
template<class T>
T CASLoop(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct CASFunctor<T,exec_space> f_cas;
f_cas.data = data;
Kokkos::parallel_for(loop,f_cas);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T,class DEVICE_TYPE>
struct CASNonAtomicFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
volatile T assumed;
volatile T newval;
bool fail=1;
do {
assumed = data();
newval = assumed + (T)1;
if(data()==assumed) {
data() = newval;
fail = 0;
}
}
while(fail);
}
};
template<class T>
T CASLoopNonAtomic(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct CASNonAtomicFunctor<T,exec_space> f_cas;
f_cas.data = data;
Kokkos::parallel_for(loop,f_cas);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T>
T CASLoopSerial(int loop) {
T* data = new T[1];
data[0] = 0;
for(int i=0;i<loop;i++) {
T assumed;
T newval;
T old;
do {
assumed = *data;
newval = assumed + (T)1;
old = *data;
*data = newval;
}
while(!(assumed==old));
}
T val = *data;
delete data;
return val;
}
template<class T,class DEVICE_TYPE>
struct ExchFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data, data2;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
T old = Kokkos::atomic_exchange(&data(),(T)i);
Kokkos::atomic_fetch_add(&data2(),old);
}
};
template<class T>
T ExchLoop(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
typename ZeroFunctor<T,exec_space>::type data2("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data2("HData");
f_zero.data = data2;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct ExchFunctor<T,exec_space> f_exch;
f_exch.data = data;
f_exch.data2 = data2;
Kokkos::parallel_for(loop,f_exch);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
Kokkos::deep_copy(h_data2,data2);
T val = h_data() + h_data2();
return val;
}
template<class T,class DEVICE_TYPE>
struct ExchNonAtomicFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data, data2;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
T old = data();
data()=(T) i;
data2()+=old;
}
};
template<class T>
T ExchLoopNonAtomic(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
typename ZeroFunctor<T,exec_space>::type data2("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data2("HData");
f_zero.data = data2;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct ExchNonAtomicFunctor<T,exec_space> f_exch;
f_exch.data = data;
f_exch.data2 = data2;
Kokkos::parallel_for(loop,f_exch);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
Kokkos::deep_copy(h_data2,data2);
T val = h_data() + h_data2();
return val;
}
template<class T>
T ExchLoopSerial(int loop) {
T* data = new T[1];
T* data2 = new T[1];
data[0] = 0;
data2[0] = 0;
for(int i=0;i<loop;i++) {
T old = *data;
*data=(T) i;
*data2+=old;
}
T val = *data2 + *data;
delete data;
delete data2;
return val;
}
template<class T>
T LoopVariant(int loop, int test) {
switch (test) {
case 1: return AddLoop<T>(loop);
case 2: return CASLoop<T>(loop);
case 3: return ExchLoop<T>(loop);
}
return 0;
}
template<class T>
T LoopVariantSerial(int loop, int test) {
switch (test) {
case 1: return AddLoopSerial<T>(loop);
case 2: return CASLoopSerial<T>(loop);
case 3: return ExchLoopSerial<T>(loop);
}
return 0;
}
template<class T>
T LoopVariantNonAtomic(int loop, int test) {
switch (test) {
case 1: return AddLoopNonAtomic<T>(loop);
case 2: return CASLoopNonAtomic<T>(loop);
case 3: return ExchLoopNonAtomic<T>(loop);
}
return 0;
}
template<class T>
void Loop(int loop, int test, const char* type_name) {
LoopVariant<T>(loop,test);
Kokkos::Impl::Timer timer;
T res = LoopVariant<T>(loop,test);
double time1 = timer.seconds();
timer.reset();
T resNonAtomic = LoopVariantNonAtomic<T>(loop,test);
double time2 = timer.seconds();
timer.reset();
T resSerial = LoopVariantSerial<T>(loop,test);
double time3 = timer.seconds();
time1*=1e6/loop;
time2*=1e6/loop;
time3*=1e6/loop;
//textcolor_standard();
bool passed = true;
if(resSerial!=res) passed = false;
//if(!passed) textcolor(RESET,BLACK,YELLOW);
printf("%s Test %i %s --- Loop: %i Value (S,A,NA): %e %e %e Time: %7.4e %7.4e %7.4e Size of Type %i)",type_name,test,passed?"PASSED":"FAILED",loop,1.0*resSerial,1.0*res,1.0*resNonAtomic,time1,time2,time3,(int)sizeof(T));
//if(!passed) textcolor_standard();
printf("\n");
}
template<class T>
void Test(int loop, int test, const char* type_name) {
if(test==-1) {
Loop<T>(loop,1,type_name);
Loop<T>(loop,2,type_name);
Loop<T>(loop,3,type_name);
}
else
Loop<T>(loop,test,type_name);
}
int main(int argc, char* argv[])
{
int type = -1;
int loop = 1000000;
int test = -1;
for(int i=0;i<argc;i++)
{
if((strcmp(argv[i],"--test")==0)) {test=atoi(argv[++i]); continue;}
if((strcmp(argv[i],"--type")==0)) {type=atoi(argv[++i]); continue;}
if((strcmp(argv[i],"-l")==0)||(strcmp(argv[i],"--loop")==0)) {loop=atoi(argv[++i]); continue;}
}
Kokkos::initialize(argc,argv);
printf("Using %s\n",Kokkos::atomic_query_version());
bool all_tests = false;
if(type==-1) all_tests = true;
while(type<100) {
if(type==1) {
Test<int>(loop,test,"int ");
}
if(type==2) {
Test<long int>(loop,test,"long int ");
}
if(type==3) {
Test<long long int>(loop,test,"long long int ");
}
if(type==4) {
Test<unsigned int>(loop,test,"unsigned int ");
}
if(type==5) {
Test<unsigned long int>(loop,test,"unsigned long int ");
}
if(type==6) {
Test<unsigned long long int>(loop,test,"unsigned long long int ");
}
if(type==10) {
//Test<float>(loop,test,"float ");
}
if(type==11) {
Test<double>(loop,test,"double ");
}
if(!all_tests) type=100;
else type++;
}
Kokkos::finalize();
}

View File

@ -1,283 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( KOKKOS_HAVE_CUDA )
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
//----------------------------------------------------------------------------
// Cuda Texture fetches can be performed for 4, 8 and 16 byte objects (int,int2,int4)
// Via reinterpret_case this can be used to support all scalar types of those sizes.
// Any other scalar type falls back to either normal reads out of global memory,
// or using the __ldg intrinsic on Kepler GPUs or newer (Compute Capability >= 3.0)
template< typename ValueType , typename AliasType >
struct CudaTextureFetch {
::cudaTextureObject_t m_obj ;
const ValueType * m_ptr ;
int m_offset ;
// Deference operator pulls through texture object and returns by value
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
AliasType v = tex1Dfetch<AliasType>( m_obj , i + m_offset );
return *(reinterpret_cast<ValueType*> (&v));
#else
return m_ptr[ i ];
#endif
}
// Pointer to referenced memory
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_ptr ; }
KOKKOS_INLINE_FUNCTION
CudaTextureFetch() : m_obj() , m_ptr() , m_offset() {}
KOKKOS_INLINE_FUNCTION
~CudaTextureFetch() {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const CudaTextureFetch & rhs )
: m_obj( rhs.m_obj )
, m_ptr( rhs.m_ptr )
, m_offset( rhs.m_offset )
{}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( CudaTextureFetch && rhs )
: m_obj( rhs.m_obj )
, m_ptr( rhs.m_ptr )
, m_offset( rhs.m_offset )
{}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs )
{
m_obj = rhs.m_obj ;
m_ptr = rhs.m_ptr ;
m_offset = rhs.m_offset ;
return *this ;
}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( CudaTextureFetch && rhs )
{
m_obj = rhs.m_obj ;
m_ptr = rhs.m_ptr ;
m_offset = rhs.m_offset ;
return *this ;
}
// Texture object spans the entire allocation.
// This handle may view a subset of the allocation, so an offset is required.
template< class CudaMemorySpace >
inline explicit
CudaTextureFetch( const ValueType * const arg_ptr
, Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > & record
)
// 'attach_texture_object' returns 0 when __CUDA_ARCH__ < 300
: m_obj( record.template attach_texture_object< AliasType >() )
, m_ptr( arg_ptr )
, m_offset( record.attach_texture_object_offset( reinterpret_cast<const AliasType*>( arg_ptr ) ) )
{}
};
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
template< typename ValueType , typename AliasType >
struct CudaLDGFetch {
const ValueType * m_ptr ;
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
AliasType v = __ldg(reinterpret_cast<AliasType*>(&m_ptr[i]));
return *(reinterpret_cast<ValueType*> (&v));
}
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_ptr ; }
KOKKOS_INLINE_FUNCTION
CudaLDGFetch() : m_ptr() {}
KOKKOS_INLINE_FUNCTION
~CudaLDGFetch() {}
KOKKOS_INLINE_FUNCTION
CudaLDGFetch( const CudaLDGFetch & rhs )
: m_ptr( rhs.m_ptr )
{}
KOKKOS_INLINE_FUNCTION
CudaLDGFetch( CudaLDGFetch && rhs )
: m_ptr( rhs.m_ptr )
{}
KOKKOS_INLINE_FUNCTION
CudaLDGFetch & operator = ( const CudaLDGFetch & rhs )
{
m_ptr = rhs.m_ptr ;
return *this ;
}
KOKKOS_INLINE_FUNCTION
CudaLDGFetch & operator = ( CudaLDGFetch && rhs )
{
m_ptr = rhs.m_ptr ;
return *this ;
}
template< class CudaMemorySpace >
inline explicit
CudaTextureFetch( const ValueType * const arg_ptr
, Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > const &
)
: m_ptr( arg_data_ptr )
{}
};
#endif
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
/** \brief Replace Default ViewDataHandle with Cuda texture fetch specialization
* if 'const' value type, CudaSpace and random access.
*/
template< class Traits >
class ViewDataHandle< Traits ,
typename std::enable_if<(
// Is Cuda memory space
( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )
&&
// Is a trivial const value of 4, 8, or 16 bytes
std::is_trivial<typename Traits::const_value_type>::value
&&
std::is_same<typename Traits::const_value_type,typename Traits::value_type>::value
&&
( sizeof(typename Traits::const_value_type) == 4 ||
sizeof(typename Traits::const_value_type) == 8 ||
sizeof(typename Traits::const_value_type) == 16 )
&&
// Random access trait
( Traits::memory_traits::RandomAccess != 0 )
)>::type >
{
public:
using track_type = Kokkos::Experimental::Impl::SharedAllocationTracker ;
using value_type = typename Traits::const_value_type ;
using return_type = typename Traits::const_value_type ; // NOT a reference
using alias_type = typename std::conditional< ( sizeof(value_type) == 4 ) , int ,
typename std::conditional< ( sizeof(value_type) == 8 ) , ::int2 ,
typename std::conditional< ( sizeof(value_type) == 16 ) , ::int4 , void
>::type
>::type
>::type ;
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ;
#else
using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ;
#endif
KOKKOS_INLINE_FUNCTION
static handle_type const & assign( handle_type const & arg_handle , track_type const & /* arg_tracker */ )
{
return arg_handle ;
}
KOKKOS_INLINE_FUNCTION
static handle_type assign( value_type * arg_data_ptr, track_type const & arg_tracker )
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
// Assignment of texture = non-texture requires creation of a texture object
// which can only occur on the host. In addition, 'get_record' is only valid
// if called in a host execution space
return handle_type( arg_data_ptr , arg_tracker.template get_record< typename Traits::memory_space >() );
#else
Kokkos::Impl::cuda_abort("Cannot create Cuda texture object from within a Cuda kernel");
return handle_type();
#endif
}
};
}
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */

View File

@ -1,277 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDAEXEC_HPP
#define KOKKOS_CUDAEXEC_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <string>
#include <Kokkos_Parallel.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Cuda/Kokkos_Cuda_abort.hpp>
#include <Cuda/Kokkos_Cuda_Error.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
struct CudaTraits {
enum { WarpSize = 32 /* 0x0020 */ };
enum { WarpIndexMask = 0x001f /* Mask for warpindex */ };
enum { WarpIndexShift = 5 /* WarpSize == 1 << WarpShift */ };
enum { SharedMemoryBanks = 32 /* Compute device 2.0 */ };
enum { SharedMemoryCapacity = 0x0C000 /* 48k shared / 16k L1 Cache */ };
enum { SharedMemoryUsage = 0x04000 /* 16k shared / 48k L1 Cache */ };
enum { UpperBoundGridCount = 65535 /* Hard upper bound */ };
enum { ConstantMemoryCapacity = 0x010000 /* 64k bytes */ };
enum { ConstantMemoryUsage = 0x008000 /* 32k bytes */ };
enum { ConstantMemoryCache = 0x002000 /* 8k bytes */ };
typedef unsigned long
ConstantGlobalBufferType[ ConstantMemoryUsage / sizeof(unsigned long) ];
enum { ConstantMemoryUseThreshold = 0x000200 /* 512 bytes */ };
KOKKOS_INLINE_FUNCTION static
CudaSpace::size_type warp_count( CudaSpace::size_type i )
{ return ( i + WarpIndexMask ) >> WarpIndexShift ; }
KOKKOS_INLINE_FUNCTION static
CudaSpace::size_type warp_align( CudaSpace::size_type i )
{
enum { Mask = ~CudaSpace::size_type( WarpIndexMask ) };
return ( i + WarpIndexMask ) & Mask ;
}
};
//----------------------------------------------------------------------------
CudaSpace::size_type cuda_internal_maximum_warp_count();
CudaSpace::size_type cuda_internal_maximum_grid_count();
CudaSpace::size_type cuda_internal_maximum_shared_words();
CudaSpace::size_type * cuda_internal_scratch_flags( const CudaSpace::size_type size );
CudaSpace::size_type * cuda_internal_scratch_space( const CudaSpace::size_type size );
CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type size );
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( __CUDACC__ )
/** \brief Access to constant memory on the device */
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
extern
#endif
__device__ __constant__
Kokkos::Impl::CudaTraits::ConstantGlobalBufferType
kokkos_impl_cuda_constant_memory_buffer ;
__device__ __constant__
int* kokkos_impl_cuda_atomic_lock_array ;
#define CUDA_SPACE_ATOMIC_MASK 0x1FFFF
#define CUDA_SPACE_ATOMIC_XOR_MASK 0x15A39
namespace Kokkos {
namespace Impl {
__device__ inline
bool lock_address_cuda_space(void* ptr) {
size_t offset = size_t(ptr);
offset = offset >> 2;
offset = offset & CUDA_SPACE_ATOMIC_MASK;
//offset = offset xor CUDA_SPACE_ATOMIC_XOR_MASK;
return (0 == atomicCAS(&kokkos_impl_cuda_atomic_lock_array[offset],0,1));
}
__device__ inline
void unlock_address_cuda_space(void* ptr) {
size_t offset = size_t(ptr);
offset = offset >> 2;
offset = offset & CUDA_SPACE_ATOMIC_MASK;
//offset = offset xor CUDA_SPACE_ATOMIC_XOR_MASK;
atomicExch( &kokkos_impl_cuda_atomic_lock_array[ offset ], 0);
}
}
}
template< typename T >
inline
__device__
T * kokkos_impl_cuda_shared_memory()
{ extern __shared__ Kokkos::CudaSpace::size_type sh[]; return (T*) sh ; }
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
// See section B.17 of Cuda C Programming Guide Version 3.2
// for discussion of
// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor)
// function qualifier which could be used to improve performance.
//----------------------------------------------------------------------------
// Maximize L1 cache and minimize shared memory:
// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferL1 );
// For 2.0 capability: 48 KB L1 and 16 KB shared
//----------------------------------------------------------------------------
template< class DriverType >
__global__
static void cuda_parallel_launch_constant_memory()
{
const DriverType & driver =
*((const DriverType *) kokkos_impl_cuda_constant_memory_buffer );
driver();
}
template< class DriverType >
__global__
static void cuda_parallel_launch_local_memory( const DriverType driver )
{
driver();
}
template < class DriverType ,
bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) ) >
struct CudaParallelLaunch ;
template < class DriverType >
struct CudaParallelLaunch< DriverType , true > {
inline
CudaParallelLaunch( const DriverType & driver
, const dim3 & grid
, const dim3 & block
, const int shmem
, const cudaStream_t stream = 0 )
{
if ( grid.x && ( block.x * block.y * block.z ) ) {
if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) <
sizeof( DriverType ) ) {
Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") );
}
if ( CudaTraits::SharedMemoryCapacity < shmem ) {
Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") );
}
else if ( shmem ) {
cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferShared );
} else {
cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferL1 );
}
// Copy functor to constant memory on the device
cudaMemcpyToSymbol( kokkos_impl_cuda_constant_memory_buffer , & driver , sizeof(DriverType) );
int* lock_array_ptr = lock_array_cuda_space_ptr();
cudaMemcpyToSymbol( kokkos_impl_cuda_atomic_lock_array , & lock_array_ptr , sizeof(int*) );
// Invoke the driver function on the device
cuda_parallel_launch_constant_memory< DriverType ><<< grid , block , shmem , stream >>>();
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
Kokkos::Cuda::fence();
CUDA_SAFE_CALL( cudaGetLastError() );
#endif
}
}
};
template < class DriverType >
struct CudaParallelLaunch< DriverType , false > {
inline
CudaParallelLaunch( const DriverType & driver
, const dim3 & grid
, const dim3 & block
, const int shmem
, const cudaStream_t stream = 0 )
{
if ( grid.x && ( block.x * block.y * block.z ) ) {
if ( CudaTraits::SharedMemoryCapacity < shmem ) {
Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") );
}
else if ( shmem ) {
cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferShared );
} else {
cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferL1 );
}
int* lock_array_ptr = lock_array_cuda_space_ptr();
cudaMemcpyToSymbol( kokkos_impl_cuda_atomic_lock_array , & lock_array_ptr , sizeof(int*) );
cuda_parallel_launch_local_memory< DriverType ><<< grid , block , shmem , stream >>>( driver );
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
Kokkos::Cuda::fence();
CUDA_SAFE_CALL( cudaGetLastError() );
#endif
}
}
};
//----------------------------------------------------------------------------
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* defined( __CUDACC__ ) */
#endif /* defined( KOKKOS_HAVE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */

View File

@ -1,686 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdlib.h>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <Kokkos_Cuda.hpp>
#include <Kokkos_CudaSpace.hpp>
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
#include <Cuda/Kokkos_Cuda_Internal.hpp>
#include <impl/Kokkos_Error.hpp>
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
namespace {
cudaStream_t get_deep_copy_stream() {
static cudaStream_t s = 0;
if( s == 0) {
cudaStreamCreate ( &s );
}
return s;
}
}
DeepCopy<CudaSpace,CudaSpace,Cuda>::DeepCopy( void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }
DeepCopy<HostSpace,CudaSpace,Cuda>::DeepCopy( void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }
DeepCopy<CudaSpace,HostSpace,Cuda>::DeepCopy( void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }
DeepCopy<CudaSpace,CudaSpace,Cuda>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }
DeepCopy<HostSpace,CudaSpace,Cuda>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }
DeepCopy<CudaSpace,HostSpace,Cuda>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }
void DeepCopyAsyncCuda( void * dst , const void * src , size_t n) {
cudaStream_t s = get_deep_copy_stream();
CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , s ) );
cudaStreamSynchronize(s);
}
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace {
void texture_object_attach_impl( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
)
{
enum { TEXTURE_BOUND_1D = 2u << 27 };
if ( tracker.attribute() == NULL ) {
// check for correct allocator
const bool ok_alloc = tracker.allocator()->support_texture_binding();
const bool ok_count = (tracker.alloc_size() / type_size) < TEXTURE_BOUND_1D;
if (ok_alloc && ok_count) {
Impl::TextureAttribute * attr = new Impl::TextureAttribute( tracker.alloc_ptr(), tracker.alloc_size(), desc );
tracker.set_attribute( attr );
}
else {
std::ostringstream oss;
oss << "Error: Cannot attach texture object";
if (!ok_alloc) {
oss << ", incompatabile allocator " << tracker.allocator()->name();
}
if (!ok_count) {
oss << ", array " << tracker.label() << " too large";
}
oss << ".";
Kokkos::Impl::throw_runtime_exception( oss.str() );
}
}
if ( NULL == dynamic_cast<Impl::TextureAttribute *>(tracker.attribute()) ) {
std::ostringstream oss;
oss << "Error: Allocation " << tracker.label() << " already has an attribute attached.";
Kokkos::Impl::throw_runtime_exception( oss.str() );
}
}
} // unnamed namespace
/*--------------------------------------------------------------------------*/
Impl::AllocationTracker CudaSpace::allocate_and_track( const std::string & label, const size_t size )
{
return Impl::AllocationTracker( allocator(), size, label);
}
void CudaSpace::texture_object_attach( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
)
{
texture_object_attach_impl( tracker, type_size, desc );
}
void CudaSpace::access_error()
{
const std::string msg("Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space" );
Kokkos::Impl::throw_runtime_exception( msg );
}
void CudaSpace::access_error( const void * const )
{
const std::string msg("Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space" );
Kokkos::Impl::throw_runtime_exception( msg );
}
/*--------------------------------------------------------------------------*/
Impl::AllocationTracker CudaUVMSpace::allocate_and_track( const std::string & label, const size_t size )
{
return Impl::AllocationTracker( allocator(), size, label);
}
void CudaUVMSpace::texture_object_attach( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
)
{
texture_object_attach_impl( tracker, type_size, desc );
}
bool CudaUVMSpace::available()
{
#if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && !defined(__APPLE__)
enum { UVM_available = true };
#else
enum { UVM_available = false };
#endif
return UVM_available;
}
/*--------------------------------------------------------------------------*/
Impl::AllocationTracker CudaHostPinnedSpace::allocate_and_track( const std::string & label, const size_t size )
{
return Impl::AllocationTracker( allocator(), size, label);
}
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
CudaSpace::CudaSpace()
: m_device( Kokkos::Cuda().cuda_device() )
{
}
CudaUVMSpace::CudaUVMSpace()
: m_device( Kokkos::Cuda().cuda_device() )
{
}
CudaHostPinnedSpace::CudaHostPinnedSpace()
{
}
void * CudaSpace::allocate( const size_t arg_alloc_size ) const
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaMalloc( &ptr, arg_alloc_size ) );
return ptr ;
}
void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) );
return ptr ;
}
void * CudaHostPinnedSpace::allocate( const size_t arg_alloc_size ) const
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaHostAlloc( &ptr, arg_alloc_size , cudaHostAllocDefault ) );
return ptr ;
}
void CudaSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
{
try {
CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
} catch(...) {}
}
void CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
{
try {
CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
} catch(...) {}
}
void CudaHostPinnedSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
{
try {
CUDA_SAFE_CALL( cudaFreeHost( arg_alloc_ptr ) );
} catch(...) {}
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::CudaSpace , void >::s_root_record ;
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::s_root_record ;
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::s_root_record ;
::cudaTextureObject_t
SharedAllocationRecord< Kokkos::CudaSpace , void >::
attach_texture_object( const unsigned sizeof_alias
, void * const alloc_ptr
, size_t const alloc_size )
{
// Only valid for 300 <= __CUDA_ARCH__
// otherwise return zero.
::cudaTextureObject_t tex_obj ;
struct cudaResourceDesc resDesc ;
struct cudaTextureDesc texDesc ;
memset( & resDesc , 0 , sizeof(resDesc) );
memset( & texDesc , 0 , sizeof(texDesc) );
resDesc.resType = cudaResourceTypeLinear ;
resDesc.res.linear.desc = ( sizeof_alias == 4 ? cudaCreateChannelDesc< int >() :
( sizeof_alias == 8 ? cudaCreateChannelDesc< ::int2 >() :
/* sizeof_alias == 16 */ cudaCreateChannelDesc< ::int4 >() ) );
resDesc.res.linear.sizeInBytes = alloc_size ;
resDesc.res.linear.devPtr = alloc_ptr ;
CUDA_SAFE_CALL( cudaCreateTextureObject( & tex_obj , & resDesc, & texDesc, NULL ) );
return tex_obj ;
}
std::string
SharedAllocationRecord< Kokkos::CudaSpace , void >::get_label() const
{
SharedAllocationHeader header ;
Kokkos::Impl::DeepCopy< Kokkos::HostSpace , Kokkos::CudaSpace >( & header , RecordBase::head() , sizeof(SharedAllocationHeader) );
return std::string( header.m_label );
}
std::string
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_label() const
{
return std::string( RecordBase::head()->m_label );
}
std::string
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_label() const
{
return std::string( RecordBase::head()->m_label );
}
SharedAllocationRecord< Kokkos::CudaSpace , void > *
SharedAllocationRecord< Kokkos::CudaSpace , void >::
allocate( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
)
{
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
}
SharedAllocationRecord< Kokkos::CudaUVMSpace , void > *
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
allocate( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
)
{
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
}
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > *
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
allocate( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
)
{
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
}
void
SharedAllocationRecord< Kokkos::CudaSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
void
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
void
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
SharedAllocationRecord< Kokkos::CudaSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::CudaSpace , void >::
SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::CudaSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_tex_obj( 0 )
, m_space( arg_space )
{
SharedAllocationHeader header ;
// Fill in the Header information
header.m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
strncpy( header.m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
// Copy to device memory
Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>::DeepCopy( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) );
}
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_tex_obj( 0 )
, m_space( arg_space )
{
// Fill in the Header information, directly accessible via UVM
RecordBase::m_alloc_ptr->m_record = this ;
strncpy( RecordBase::m_alloc_ptr->m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
}
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_space( arg_space )
{
// Fill in the Header information, directly accessible via UVM
RecordBase::m_alloc_ptr->m_record = this ;
strncpy( RecordBase::m_alloc_ptr->m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
}
SharedAllocationRecord< Kokkos::CudaSpace , void > *
SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr )
{
using Header = SharedAllocationHeader ;
using RecordBase = SharedAllocationRecord< void , void > ;
using RecordCuda = SharedAllocationRecord< Kokkos::CudaSpace , void > ;
#if 0
// Copy the header from the allocation
SharedAllocationHeader head ;
SharedAllocationHeader const * const head_cuda = Header::get_header( alloc_ptr );
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , head_cuda , sizeof(SharedAllocationHeader) );
RecordCuda * const record = static_cast< RecordCuda * >( head.m_record );
if ( record->m_alloc_ptr != head_cuda ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
}
#else
// Iterate the list to search for the record among all allocations
// requires obtaining the root of the list and then locking the list.
RecordCuda * const record = static_cast< RecordCuda * >( RecordBase::find( & s_root_record , alloc_ptr ) );
if ( record == 0 ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
}
#endif
return record ;
}
SharedAllocationRecord< Kokkos::CudaUVMSpace , void > *
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record( void * alloc_ptr )
{
using Header = SharedAllocationHeader ;
using RecordCuda = SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;
Header * const h = reinterpret_cast< Header * >( alloc_ptr ) - 1 ;
if ( h->m_record->m_alloc_ptr != h ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record ERROR" ) );
}
return static_cast< RecordCuda * >( h->m_record );
}
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > *
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record( void * alloc_ptr )
{
using Header = SharedAllocationHeader ;
using RecordCuda = SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > ;
Header * const h = reinterpret_cast< Header * >( alloc_ptr ) - 1 ;
if ( h->m_record->m_alloc_ptr != h ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record ERROR" ) );
}
return static_cast< RecordCuda * >( h->m_record );
}
// Iterate records to print orphaned memory ...
void
SharedAllocationRecord< Kokkos::CudaSpace , void >::
print_records( std::ostream & s , const Kokkos::CudaSpace & space , bool detail )
{
SharedAllocationRecord< void , void > * r = & s_root_record ;
char buffer[256] ;
SharedAllocationHeader head ;
if ( detail ) {
do {
if ( r->m_alloc_ptr ) {
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
}
else {
head.m_label[0] = 0 ;
}
snprintf( buffer , 256 , "Cuda addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n"
, reinterpret_cast<unsigned long>( r )
, reinterpret_cast<unsigned long>( r->m_prev )
, reinterpret_cast<unsigned long>( r->m_next )
, reinterpret_cast<unsigned long>( r->m_alloc_ptr )
, r->m_alloc_size
, r->m_count
, reinterpret_cast<unsigned long>( r->m_dealloc )
, head.m_label
);
std::cout << buffer ;
r = r->m_next ;
} while ( r != & s_root_record );
}
else {
do {
if ( r->m_alloc_ptr ) {
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
snprintf( buffer , 256 , "Cuda [ 0x%.12lx + %ld ] %s\n"
, reinterpret_cast< unsigned long >( r->data() )
, r->size()
, head.m_label
);
}
else {
snprintf( buffer , 256 , "Cuda [ 0 + 0 ]\n" );
}
std::cout << buffer ;
r = r->m_next ;
} while ( r != & s_root_record );
}
}
void
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
print_records( std::ostream & s , const Kokkos::CudaUVMSpace & space , bool detail )
{
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "CudaUVM" , & s_root_record , detail );
}
void
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
print_records( std::ostream & s , const Kokkos::CudaHostPinnedSpace & space , bool detail )
{
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "CudaHostPinned" , & s_root_record , detail );
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace {
__global__ void init_lock_array_kernel() {
unsigned i = blockIdx.x*blockDim.x + threadIdx.x;
if(i<CUDA_SPACE_ATOMIC_MASK+1)
kokkos_impl_cuda_atomic_lock_array[i] = 0;
}
}
namespace Impl {
int* lock_array_cuda_space_ptr(bool deallocate) {
static int* ptr = NULL;
if(deallocate) {
cudaFree(ptr);
ptr = NULL;
}
if(ptr==NULL && !deallocate)
cudaMalloc(&ptr,sizeof(int)*(CUDA_SPACE_ATOMIC_MASK+1));
return ptr;
}
void init_lock_array_cuda_space() {
int is_initialized = 0;
if(! is_initialized) {
int* lock_array_ptr = lock_array_cuda_space_ptr();
cudaMemcpyToSymbol( kokkos_impl_cuda_atomic_lock_array , & lock_array_ptr , sizeof(int*) );
init_lock_array_kernel<<<(CUDA_SPACE_ATOMIC_MASK+255)/256,256>>>();
}
}
}
}
#endif // KOKKOS_HAVE_CUDA

View File

@ -1,183 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP
#define KOKKOS_CUDA_ALLOCATION_TRACKING_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_AllocationTracker.hpp> // AllocatorAttributeBase
namespace Kokkos {
namespace Impl {
template< class DestructFunctor >
SharedAllocationRecord *
shared_allocation_record( Kokkos::CudaSpace const & arg_space
, void * const arg_alloc_ptr
, DestructFunctor const & arg_destruct )
{
SharedAllocationRecord * const record = SharedAllocationRecord::get_record( arg_alloc_ptr );
// assert: record != 0
// assert: sizeof(DestructFunctor) <= record->m_destruct_size
// assert: record->m_destruct_function == 0
DestructFunctor * const functor =
reinterpret_cast< DestructFunctor * >(
reinterpret_cast< unsigned long >( record ) + sizeof(SharedAllocationRecord) );
new( functor ) DestructFunctor( arg_destruct );
record->m_destruct_functor = & shared_allocation_destroy< DestructFunctor > ;
return record ;
}
/// class CudaUnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
struct CudaUnmanagedAllocator
{
static const char * name()
{
return "Cuda Unmanaged Allocator";
}
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
static bool support_texture_binding() { return true; }
};
/// class CudaUnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
struct CudaUnmanagedUVMAllocator
{
static const char * name()
{
return "Cuda Unmanaged UVM Allocator";
}
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
static bool support_texture_binding() { return true; }
};
/// class CudaUnmanagedHostAllocator
/// does nothing when deallocate(ptr,size) is called
class CudaUnmanagedHostAllocator
{
public:
static const char * name()
{
return "Cuda Unmanaged Host Allocator";
}
// Unmanaged deallocate does nothing
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
};
/// class CudaMallocAllocator
class CudaMallocAllocator
{
public:
static const char * name()
{
return "Cuda Malloc Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
static bool support_texture_binding() { return true; }
};
/// class CudaUVMAllocator
class CudaUVMAllocator
{
public:
static const char * name()
{
return "Cuda UVM Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
static bool support_texture_binding() { return true; }
};
/// class CudaHostAllocator
class CudaHostAllocator
{
public:
static const char * name()
{
return "Cuda Host Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
#endif // #ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP

View File

@ -1,192 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <impl/Kokkos_Error.hpp>
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
#include <Cuda/Kokkos_Cuda_Error.hpp>
#include <sstream>
namespace Kokkos { namespace Impl {
/*--------------------------------------------------------------------------*/
TextureAttribute::TextureAttribute( void * const alloc_ptr
, size_t alloc_size
, cudaChannelFormatDesc const & desc
)
: m_tex_obj(0)
{
cuda_device_synchronize();
struct cudaResourceDesc resDesc ;
struct cudaTextureDesc texDesc ;
memset( & resDesc , 0 , sizeof(resDesc) );
memset( & texDesc , 0 , sizeof(texDesc) );
resDesc.resType = cudaResourceTypeLinear ;
resDesc.res.linear.desc = desc ;
resDesc.res.linear.sizeInBytes = alloc_size ;
resDesc.res.linear.devPtr = alloc_ptr ;
CUDA_SAFE_CALL( cudaCreateTextureObject( & m_tex_obj , & resDesc, & texDesc, NULL) );
cuda_device_synchronize();
}
TextureAttribute::~TextureAttribute()
{
if (m_tex_obj) {
cudaDestroyTextureObject( m_tex_obj );
}
}
/*--------------------------------------------------------------------------*/
void * CudaMallocAllocator::allocate( size_t size )
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaMalloc( &ptr, size ) );
return ptr;
}
void CudaMallocAllocator::deallocate( void * ptr, size_t /*size*/ )
{
try {
CUDA_SAFE_CALL( cudaFree( ptr ) );
} catch(...) {}
}
void * CudaMallocAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
void * ptr = old_ptr;
if (old_size != new_size) {
ptr = allocate( new_size );
size_t copy_size = old_size < new_size ? old_size : new_size;
CUDA_SAFE_CALL( cudaMemcpy( ptr , old_ptr , copy_size , cudaMemcpyDefault ) );
deallocate( old_ptr, old_size );
}
return ptr;
}
/*--------------------------------------------------------------------------*/
void * CudaUVMAllocator::allocate( size_t size )
{
#if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION )
void * ptr = NULL;
CUDA_SAFE_CALL( cudaMallocManaged( &ptr, size, cudaMemAttachGlobal ) );
return ptr;
#else
throw_runtime_exception( "CUDA VERSION does not support UVM" );
return NULL;
#endif
}
void CudaUVMAllocator::deallocate( void * ptr, size_t /*size*/ )
{
try {
CUDA_SAFE_CALL( cudaFree( ptr ) );
} catch(...) {}
}
void * CudaUVMAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
void * ptr = old_ptr;
if (old_size != new_size) {
ptr = allocate( new_size );
size_t copy_size = old_size < new_size ? old_size : new_size;
CUDA_SAFE_CALL( cudaMemcpy( ptr , old_ptr , copy_size , cudaMemcpyDefault ) );
deallocate( old_ptr, old_size );
}
return ptr;
}
/*--------------------------------------------------------------------------*/
void * CudaHostAllocator::allocate( size_t size )
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaHostAlloc( &ptr , size , cudaHostAllocDefault ) );
return ptr;
}
void CudaHostAllocator::deallocate( void * ptr, size_t /*size*/ )
{
try {
CUDA_SAFE_CALL( cudaFreeHost( ptr ) );
} catch(...) {}
}
void * CudaHostAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
void * ptr = old_ptr;
if (old_size != new_size) {
ptr = allocate( new_size );
size_t copy_size = old_size < new_size ? old_size : new_size;
CUDA_SAFE_CALL( cudaMemcpy( ptr , old_ptr , copy_size , cudaMemcpyHostToHost ) );
deallocate( old_ptr, old_size );
}
return ptr;
}
/*--------------------------------------------------------------------------*/
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA

View File

@ -1,187 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_BASIC_ALLOCATORS_HPP
#define KOKKOS_CUDA_BASIC_ALLOCATORS_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_AllocationTracker.hpp> // AllocatorAttributeBase
namespace Kokkos { namespace Impl {
// Cuda 5.0 <texture_types.h> defines 'cudaTextureObject_t'
// to be an 'unsigned long long'. This chould change with
// future version of Cuda and this typedef would have to
// change accordingly.
#if defined( CUDA_VERSION ) && ( 5000 <= CUDA_VERSION )
typedef enable_if<
sizeof(::cudaTextureObject_t) == sizeof(const void *) ,
::cudaTextureObject_t >::type cuda_texture_object_type ;
#else
typedef const void * cuda_texture_object_type ;
#endif
struct TextureAttribute : public AllocatorAttributeBase
{
cuda_texture_object_type m_tex_obj ;
TextureAttribute( void * const alloc_ptr
, size_t alloc_size
, cudaChannelFormatDesc const & desc
);
~TextureAttribute();
};
/// class CudaUnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
struct CudaUnmanagedAllocator
{
static const char * name()
{
return "Cuda Unmanaged Allocator";
}
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
static bool support_texture_binding() { return true; }
};
/// class CudaUnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
struct CudaUnmanagedUVMAllocator
{
static const char * name()
{
return "Cuda Unmanaged UVM Allocator";
}
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
static bool support_texture_binding() { return true; }
};
/// class CudaUnmanagedHostAllocator
/// does nothing when deallocate(ptr,size) is called
class CudaUnmanagedHostAllocator
{
public:
static const char * name()
{
return "Cuda Unmanaged Host Allocator";
}
// Unmanaged deallocate does nothing
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
};
/// class CudaMallocAllocator
class CudaMallocAllocator
{
public:
static const char * name()
{
return "Cuda Malloc Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
static bool support_texture_binding() { return true; }
};
/// class CudaUVMAllocator
class CudaUVMAllocator
{
public:
static const char * name()
{
return "Cuda UVM Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
static bool support_texture_binding() { return true; }
};
/// class CudaHostAllocator
class CudaHostAllocator
{
public:
static const char * name()
{
return "Cuda Host Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
#endif //KOKKOS_CUDA_BASIC_ALLOCATORS_HPP

View File

@ -1,69 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_ERROR_HPP
#define KOKKOS_CUDA_ERROR_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
namespace Kokkos { namespace Impl {
void cuda_device_synchronize();
void cuda_internal_error_throw( cudaError e , const char * name, const char * file = NULL, const int line = 0 );
inline void cuda_internal_safe_call( cudaError e , const char * name, const char * file = NULL, const int line = 0)
{
if ( cudaSuccess != e ) { cuda_internal_error_throw( e , name, file, line ); }
}
#define CUDA_SAFE_CALL( call ) \
Kokkos::Impl::cuda_internal_safe_call( call , #call, __FILE__, __LINE__ )
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
#endif //KOKKOS_CUDA_ERROR_HPP

View File

@ -1,678 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/*--------------------------------------------------------------------------*/
/* Kokkos interfaces */
#include <Kokkos_Core.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <Cuda/Kokkos_Cuda_Error.hpp>
#include <Cuda/Kokkos_Cuda_Internal.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <impl/Kokkos_Error.hpp>
/*--------------------------------------------------------------------------*/
/* Standard 'C' libraries */
#include <stdlib.h>
/* Standard 'C++' libraries */
#include <vector>
#include <iostream>
#include <sstream>
#include <string>
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
__device__ __constant__
Kokkos::Impl::CudaTraits::ConstantGlobalBufferType
kokkos_impl_cuda_constant_memory_buffer ;
#endif
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
namespace {
__global__
void query_cuda_kernel_arch( int * d_arch )
{
#if defined( __CUDA_ARCH__ )
*d_arch = __CUDA_ARCH__ ;
#else
*d_arch = 0 ;
#endif
}
/** Query what compute capability is actually launched to the device: */
int cuda_kernel_arch()
{
int * d_arch = 0 ;
cudaMalloc( (void **) & d_arch , sizeof(int) );
query_cuda_kernel_arch<<<1,1>>>( d_arch );
int arch = 0 ;
cudaMemcpy( & arch , d_arch , sizeof(int) , cudaMemcpyDefault );
cudaFree( d_arch );
return arch ;
}
bool cuda_launch_blocking()
{
const char * env = getenv("CUDA_LAUNCH_BLOCKING");
if (env == 0) return false;
return atoi(env);
}
}
void cuda_device_synchronize()
{
// static const bool launch_blocking = cuda_launch_blocking();
// if (!launch_blocking) {
CUDA_SAFE_CALL( cudaDeviceSynchronize() );
// }
}
void cuda_internal_error_throw( cudaError e , const char * name, const char * file, const int line )
{
std::ostringstream out ;
out << name << " error( " << cudaGetErrorName(e) << "): " << cudaGetErrorString(e);
if (file) {
out << " " << file << ":" << line;
}
throw_runtime_exception( out.str() );
}
//----------------------------------------------------------------------------
// Some significant cuda device properties:
//
// cudaDeviceProp::name : Text label for device
// cudaDeviceProp::major : Device major number
// cudaDeviceProp::minor : Device minor number
// cudaDeviceProp::warpSize : number of threads per warp
// cudaDeviceProp::multiProcessorCount : number of multiprocessors
// cudaDeviceProp::sharedMemPerBlock : capacity of shared memory per block
// cudaDeviceProp::totalConstMem : capacity of constant memory
// cudaDeviceProp::totalGlobalMem : capacity of global memory
// cudaDeviceProp::maxGridSize[3] : maximum grid size
//
// Section 4.4.2.4 of the CUDA Toolkit Reference Manual
//
// struct cudaDeviceProp {
// char name[256];
// size_t totalGlobalMem;
// size_t sharedMemPerBlock;
// int regsPerBlock;
// int warpSize;
// size_t memPitch;
// int maxThreadsPerBlock;
// int maxThreadsDim[3];
// int maxGridSize[3];
// size_t totalConstMem;
// int major;
// int minor;
// int clockRate;
// size_t textureAlignment;
// int deviceOverlap;
// int multiProcessorCount;
// int kernelExecTimeoutEnabled;
// int integrated;
// int canMapHostMemory;
// int computeMode;
// int concurrentKernels;
// int ECCEnabled;
// int pciBusID;
// int pciDeviceID;
// int tccDriver;
// int asyncEngineCount;
// int unifiedAddressing;
// int memoryClockRate;
// int memoryBusWidth;
// int l2CacheSize;
// int maxThreadsPerMultiProcessor;
// };
namespace {
class CudaInternalDevices {
public:
enum { MAXIMUM_DEVICE_COUNT = 8 };
struct cudaDeviceProp m_cudaProp[ MAXIMUM_DEVICE_COUNT ] ;
int m_cudaDevCount ;
CudaInternalDevices();
static const CudaInternalDevices & singleton();
};
CudaInternalDevices::CudaInternalDevices()
{
// See 'cudaSetDeviceFlags' for host-device thread interaction
// Section 4.4.2.6 of the CUDA Toolkit Reference Manual
CUDA_SAFE_CALL (cudaGetDeviceCount( & m_cudaDevCount ) );
for ( int i = 0 ; i < m_cudaDevCount ; ++i ) {
CUDA_SAFE_CALL( cudaGetDeviceProperties( m_cudaProp + i , i ) );
}
}
const CudaInternalDevices & CudaInternalDevices::singleton()
{
static CudaInternalDevices self ; return self ;
}
}
//----------------------------------------------------------------------------
class CudaInternal {
private:
CudaInternal( const CudaInternal & );
CudaInternal & operator = ( const CudaInternal & );
AllocationTracker m_scratchFlagsTracker;
AllocationTracker m_scratchSpaceTracker;
AllocationTracker m_scratchUnifiedTracker;
public:
typedef Cuda::size_type size_type ;
int m_cudaDev ;
int m_cudaArch ;
unsigned m_maxWarpCount ;
unsigned m_maxBlock ;
unsigned m_maxSharedWords ;
size_type m_scratchSpaceCount ;
size_type m_scratchFlagsCount ;
size_type m_scratchUnifiedCount ;
size_type m_scratchUnifiedSupported ;
size_type m_streamCount ;
size_type * m_scratchSpace ;
size_type * m_scratchFlags ;
size_type * m_scratchUnified ;
cudaStream_t * m_stream ;
static CudaInternal & singleton();
int verify_is_initialized( const char * const label ) const ;
int is_initialized() const
{ return 0 != m_scratchSpace && 0 != m_scratchFlags ; }
void initialize( int cuda_device_id , int stream_count );
void finalize();
void print_configuration( std::ostream & ) const ;
~CudaInternal();
CudaInternal()
: m_cudaDev( -1 )
, m_cudaArch( -1 )
, m_maxWarpCount( 0 )
, m_maxBlock( 0 )
, m_maxSharedWords( 0 )
, m_scratchSpaceCount( 0 )
, m_scratchFlagsCount( 0 )
, m_scratchUnifiedCount( 0 )
, m_scratchUnifiedSupported( 0 )
, m_streamCount( 0 )
, m_scratchSpace( 0 )
, m_scratchFlags( 0 )
, m_scratchUnified( 0 )
, m_stream( 0 )
{}
size_type * scratch_space( const size_type size );
size_type * scratch_flags( const size_type size );
size_type * scratch_unified( const size_type size );
};
//----------------------------------------------------------------------------
void CudaInternal::print_configuration( std::ostream & s ) const
{
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
#if defined( KOKKOS_HAVE_CUDA )
s << "macro KOKKOS_HAVE_CUDA : defined" << std::endl ;
#endif
#if defined( CUDA_VERSION )
s << "macro CUDA_VERSION = " << CUDA_VERSION
<< " = version " << CUDA_VERSION / 1000
<< "." << ( CUDA_VERSION % 1000 ) / 10
<< std::endl ;
#endif
for ( int i = 0 ; i < dev_info.m_cudaDevCount ; ++i ) {
s << "Kokkos::Cuda[ " << i << " ] "
<< dev_info.m_cudaProp[i].name
<< " capability " << dev_info.m_cudaProp[i].major << "." << dev_info.m_cudaProp[i].minor
<< ", Total Global Memory: " << human_memory_size(dev_info.m_cudaProp[i].totalGlobalMem)
<< ", Shared Memory per Block: " << human_memory_size(dev_info.m_cudaProp[i].sharedMemPerBlock);
if ( m_cudaDev == i ) s << " : Selected" ;
s << std::endl ;
}
}
//----------------------------------------------------------------------------
CudaInternal::~CudaInternal()
{
if ( m_stream ||
m_scratchSpace ||
m_scratchFlags ||
m_scratchUnified ) {
std::cerr << "Kokkos::Cuda ERROR: Failed to call Kokkos::Cuda::finalize()"
<< std::endl ;
std::cerr.flush();
}
m_cudaDev = -1 ;
m_cudaArch = -1 ;
m_maxWarpCount = 0 ;
m_maxBlock = 0 ;
m_maxSharedWords = 0 ;
m_scratchSpaceCount = 0 ;
m_scratchFlagsCount = 0 ;
m_scratchUnifiedCount = 0 ;
m_scratchUnifiedSupported = 0 ;
m_streamCount = 0 ;
m_scratchSpace = 0 ;
m_scratchFlags = 0 ;
m_scratchUnified = 0 ;
m_stream = 0 ;
}
int CudaInternal::verify_is_initialized( const char * const label ) const
{
if ( m_cudaDev < 0 ) {
std::cerr << "Kokkos::Cuda::" << label << " : ERROR device not initialized" << std::endl ;
}
return 0 <= m_cudaDev ;
}
CudaInternal & CudaInternal::singleton()
{
static CudaInternal self ;
return self ;
}
void CudaInternal::initialize( int cuda_device_id , int stream_count )
{
enum { WordSize = sizeof(size_type) };
if ( ! HostSpace::execution_space::is_initialized() ) {
const std::string msg("Cuda::initialize ERROR : HostSpace::execution_space is not initialized");
throw_runtime_exception( msg );
}
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
const bool ok_init = 0 == m_scratchSpace || 0 == m_scratchFlags ;
const bool ok_id = 0 <= cuda_device_id &&
cuda_device_id < dev_info.m_cudaDevCount ;
// Need device capability 2.0 or better
const bool ok_dev = ok_id &&
( 2 <= dev_info.m_cudaProp[ cuda_device_id ].major &&
0 <= dev_info.m_cudaProp[ cuda_device_id ].minor );
if ( ok_init && ok_dev ) {
const struct cudaDeviceProp & cudaProp =
dev_info.m_cudaProp[ cuda_device_id ];
m_cudaDev = cuda_device_id ;
CUDA_SAFE_CALL( cudaSetDevice( m_cudaDev ) );
CUDA_SAFE_CALL( cudaDeviceReset() );
Kokkos::Impl::cuda_device_synchronize();
// Query what compute capability architecture a kernel executes:
m_cudaArch = cuda_kernel_arch();
if ( m_cudaArch != cudaProp.major * 100 + cudaProp.minor * 10 ) {
std::cerr << "Kokkos::Cuda::initialize WARNING: running kernels compiled for compute capability "
<< ( m_cudaArch / 100 ) << "." << ( ( m_cudaArch % 100 ) / 10 )
<< " on device with compute capability "
<< cudaProp.major << "." << cudaProp.minor
<< " , this will likely reduce potential performance."
<< std::endl ;
}
//----------------------------------
// Maximum number of warps,
// at most one warp per thread in a warp for reduction.
// HCE 2012-February :
// Found bug in CUDA 4.1 that sometimes a kernel launch would fail
// if the thread count == 1024 and a functor is passed to the kernel.
// Copying the kernel to constant memory and then launching with
// thread count == 1024 would work fine.
//
// HCE 2012-October :
// All compute capabilities support at least 16 warps (512 threads).
// However, we have found that 8 warps typically gives better performance.
m_maxWarpCount = 8 ;
// m_maxWarpCount = cudaProp.maxThreadsPerBlock / Impl::CudaTraits::WarpSize ;
if ( Impl::CudaTraits::WarpSize < m_maxWarpCount ) {
m_maxWarpCount = Impl::CudaTraits::WarpSize ;
}
m_maxSharedWords = cudaProp.sharedMemPerBlock / WordSize ;
//----------------------------------
// Maximum number of blocks:
m_maxBlock = m_cudaArch < 300 ? 65535 : cudaProp.maxGridSize[0] ;
//----------------------------------
m_scratchUnifiedSupported = cudaProp.unifiedAddressing ;
if ( ! m_scratchUnifiedSupported ) {
std::cout << "Kokkos::Cuda device "
<< cudaProp.name << " capability "
<< cudaProp.major << "." << cudaProp.minor
<< " does not support unified virtual address space"
<< std::endl ;
}
//----------------------------------
// Multiblock reduction uses scratch flags for counters
// and scratch space for partial reduction values.
// Allocate some initial space. This will grow as needed.
{
const unsigned reduce_block_count = m_maxWarpCount * Impl::CudaTraits::WarpSize ;
(void) scratch_unified( 16 * sizeof(size_type) );
(void) scratch_flags( reduce_block_count * 2 * sizeof(size_type) );
(void) scratch_space( reduce_block_count * 16 * sizeof(size_type) );
}
//----------------------------------
if ( stream_count ) {
m_stream = (cudaStream_t*) ::malloc( stream_count * sizeof(cudaStream_t) );
m_streamCount = stream_count ;
for ( size_type i = 0 ; i < m_streamCount ; ++i ) m_stream[i] = 0 ;
}
}
else {
std::ostringstream msg ;
msg << "Kokkos::Cuda::initialize(" << cuda_device_id << ") FAILED" ;
if ( ! ok_init ) {
msg << " : Already initialized" ;
}
if ( ! ok_id ) {
msg << " : Device identifier out of range "
<< "[0.." << dev_info.m_cudaDevCount << "]" ;
}
else if ( ! ok_dev ) {
msg << " : Device " ;
msg << dev_info.m_cudaProp[ cuda_device_id ].major ;
msg << "." ;
msg << dev_info.m_cudaProp[ cuda_device_id ].minor ;
msg << " has insufficient capability, required 2.0 or better" ;
}
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
// Init the array for used for arbitrarily sized atomics
Impl::init_lock_array_cuda_space();
}
//----------------------------------------------------------------------------
typedef Cuda::size_type ScratchGrain[ Impl::CudaTraits::WarpSize ] ;
enum { sizeScratchGrain = sizeof(ScratchGrain) };
Cuda::size_type *
CudaInternal::scratch_flags( const Cuda::size_type size )
{
if ( verify_is_initialized("scratch_flags") && m_scratchFlagsCount * sizeScratchGrain < size ) {
m_scratchFlagsCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
m_scratchFlagsTracker = CudaSpace::allocate_and_track( std::string("InternalScratchFlags") , sizeof( ScratchGrain ) * m_scratchFlagsCount );
m_scratchFlags = reinterpret_cast<size_type *>(m_scratchFlagsTracker.alloc_ptr());
CUDA_SAFE_CALL( cudaMemset( m_scratchFlags , 0 , m_scratchFlagsCount * sizeScratchGrain ) );
}
return m_scratchFlags ;
}
Cuda::size_type *
CudaInternal::scratch_space( const Cuda::size_type size )
{
if ( verify_is_initialized("scratch_space") && m_scratchSpaceCount * sizeScratchGrain < size ) {
m_scratchSpaceCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
m_scratchSpaceTracker = CudaSpace::allocate_and_track( std::string("InternalScratchSpace") , sizeof( ScratchGrain ) * m_scratchSpaceCount );
m_scratchSpace = reinterpret_cast<size_type *>(m_scratchSpaceTracker.alloc_ptr());
}
return m_scratchSpace ;
}
Cuda::size_type *
CudaInternal::scratch_unified( const Cuda::size_type size )
{
if ( verify_is_initialized("scratch_unified") &&
m_scratchUnifiedSupported && m_scratchUnifiedCount * sizeScratchGrain < size ) {
m_scratchUnifiedCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
m_scratchUnifiedTracker = CudaHostPinnedSpace::allocate_and_track( std::string("InternalScratchUnified") , sizeof( ScratchGrain ) * m_scratchUnifiedCount );
m_scratchUnified = reinterpret_cast<size_type *>( m_scratchUnifiedTracker.alloc_ptr() );
}
return m_scratchUnified ;
}
//----------------------------------------------------------------------------
void CudaInternal::finalize()
{
if ( 0 != m_scratchSpace || 0 != m_scratchFlags ) {
lock_array_cuda_space_ptr(true);
if ( m_stream ) {
for ( size_type i = 1 ; i < m_streamCount ; ++i ) {
cudaStreamDestroy( m_stream[i] );
m_stream[i] = 0 ;
}
::free( m_stream );
}
m_scratchSpaceTracker.clear();
m_scratchFlagsTracker.clear();
m_scratchUnifiedTracker.clear();
m_cudaDev = -1 ;
m_maxWarpCount = 0 ;
m_maxBlock = 0 ;
m_maxSharedWords = 0 ;
m_scratchSpaceCount = 0 ;
m_scratchFlagsCount = 0 ;
m_scratchUnifiedCount = 0 ;
m_streamCount = 0 ;
m_scratchSpace = 0 ;
m_scratchFlags = 0 ;
m_scratchUnified = 0 ;
m_stream = 0 ;
}
}
//----------------------------------------------------------------------------
Cuda::size_type cuda_internal_maximum_warp_count()
{ return CudaInternal::singleton().m_maxWarpCount ; }
Cuda::size_type cuda_internal_maximum_grid_count()
{ return CudaInternal::singleton().m_maxBlock ; }
Cuda::size_type cuda_internal_maximum_shared_words()
{ return CudaInternal::singleton().m_maxSharedWords ; }
Cuda::size_type * cuda_internal_scratch_space( const Cuda::size_type size )
{ return CudaInternal::singleton().scratch_space( size ); }
Cuda::size_type * cuda_internal_scratch_flags( const Cuda::size_type size )
{ return CudaInternal::singleton().scratch_flags( size ); }
Cuda::size_type * cuda_internal_scratch_unified( const Cuda::size_type size )
{ return CudaInternal::singleton().scratch_unified( size ); }
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
Cuda::size_type Cuda::detect_device_count()
{ return Impl::CudaInternalDevices::singleton().m_cudaDevCount ; }
int Cuda::is_initialized()
{ return Impl::CudaInternal::singleton().is_initialized(); }
void Cuda::initialize( const Cuda::SelectDevice config , size_t num_instances )
{ Impl::CudaInternal::singleton().initialize( config.cuda_device_id , num_instances ); }
std::vector<unsigned>
Cuda::detect_device_arch()
{
const Impl::CudaInternalDevices & s = Impl::CudaInternalDevices::singleton();
std::vector<unsigned> output( s.m_cudaDevCount );
for ( int i = 0 ; i < s.m_cudaDevCount ; ++i ) {
output[i] = s.m_cudaProp[i].major * 100 + s.m_cudaProp[i].minor ;
}
return output ;
}
Cuda::size_type Cuda::device_arch()
{
const int dev_id = Impl::CudaInternal::singleton().m_cudaDev ;
int dev_arch = 0 ;
if ( 0 <= dev_id ) {
const struct cudaDeviceProp & cudaProp =
Impl::CudaInternalDevices::singleton().m_cudaProp[ dev_id ] ;
dev_arch = cudaProp.major * 100 + cudaProp.minor ;
}
return dev_arch ;
}
void Cuda::finalize()
{ Impl::CudaInternal::singleton().finalize(); }
Cuda::Cuda()
: m_device( Impl::CudaInternal::singleton().m_cudaDev )
, m_stream( 0 )
{
Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" );
}
Cuda::Cuda( const int instance_id )
: m_device( Impl::CudaInternal::singleton().m_cudaDev )
, m_stream(
Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" )
? Impl::CudaInternal::singleton().m_stream[ instance_id % Impl::CudaInternal::singleton().m_streamCount ]
: 0 )
{}
void Cuda::print_configuration( std::ostream & s , const bool )
{ Impl::CudaInternal::singleton().print_configuration( s ); }
bool Cuda::sleep() { return false ; }
bool Cuda::wake() { return true ; }
void Cuda::fence()
{
Kokkos::Impl::cuda_device_synchronize();
}
} // namespace Kokkos
#endif // KOKKOS_HAVE_CUDA
//----------------------------------------------------------------------------

View File

@ -1,165 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_INTERNAL_HPP
#define KOKKOS_CUDA_INTERNAL_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <Cuda/Kokkos_Cuda_Error.hpp>
namespace Kokkos { namespace Impl {
template<class DriverType>
int cuda_get_max_block_size(const typename DriverType::functor_type & f) {
#if ( CUDA_VERSION < 6050 )
return 256;
#else
bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) );
int numBlocks;
if(Large) {
int blockSize=32;
int sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_constant_memory<DriverType>,
blockSize,
sharedmem);
while (blockSize<1024 && numBlocks>0) {
blockSize*=2;
sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_constant_memory<DriverType>,
blockSize,
sharedmem);
}
if(numBlocks>0) return blockSize;
else return blockSize/2;
} else {
int blockSize=32;
int sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_local_memory<DriverType>,
blockSize,
sharedmem);
while (blockSize<1024 && numBlocks>0) {
blockSize*=2;
sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_local_memory<DriverType>,
blockSize,
sharedmem);
}
if(numBlocks>0) return blockSize;
else return blockSize/2;
}
#endif
}
template<class DriverType>
int cuda_get_opt_block_size(const typename DriverType::functor_type & f) {
#if ( CUDA_VERSION < 6050 )
return 256;
#else
bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) );
int blockSize=16;
int numBlocks;
int sharedmem;
int maxOccupancy=0;
int bestBlockSize=0;
if(Large) {
while(blockSize<1024) {
blockSize*=2;
//calculate the occupancy with that optBlockSize and check whether its larger than the largest one found so far
sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_constant_memory<DriverType>,
blockSize,
sharedmem);
if(maxOccupancy < numBlocks*blockSize) {
maxOccupancy = numBlocks*blockSize;
bestBlockSize = blockSize;
}
}
} else {
while(blockSize<1024) {
blockSize*=2;
sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_local_memory<DriverType>,
blockSize,
sharedmem);
if(maxOccupancy < numBlocks*blockSize) {
maxOccupancy = numBlocks*blockSize;
bestBlockSize = blockSize;
}
}
}
return bestBlockSize;
#endif
}
}} // namespace Kokkos::Impl
#endif // KOKKOS_HAVE_CUDA
#endif /* #ifndef KOKKOS_CUDA_INTERNAL_HPP */

File diff suppressed because it is too large Load Diff

View File

@ -1,424 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_REDUCESCAN_HPP
#define KOKKOS_CUDA_REDUCESCAN_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
#include <utility>
#include <Kokkos_Parallel.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Cuda/Kokkos_Cuda_Vectorization.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//Shfl based reductions
/*
* Algorithmic constraints:
* (a) threads with same threadIdx.y have same value
* (b) blockDim.x == power of two
* (c) blockDim.z == 1
*/
template< class ValueType , class JoinOp>
__device__
inline void cuda_intra_warp_reduction( ValueType& result,
const JoinOp& join,
const int max_active_thread = blockDim.y) {
unsigned int shift = 1;
//Reduce over values from threads with different threadIdx.y
while(blockDim.x * shift < 32 ) {
const ValueType tmp = shfl_down(result, blockDim.x*shift,32u);
//Only join if upper thread is active (this allows non power of two for blockDim.y
if(threadIdx.y + shift < max_active_thread)
join(result , tmp);
shift*=2;
}
result = shfl(result,0,32);
}
template< class ValueType , class JoinOp>
__device__
inline void cuda_inter_warp_reduction( ValueType& value,
const JoinOp& join,
const int max_active_thread = blockDim.y) {
#define STEP_WIDTH 4
__shared__ char sh_result[sizeof(ValueType)*STEP_WIDTH];
ValueType* result = (ValueType*) & sh_result;
const unsigned step = 32 / blockDim.x;
unsigned shift = STEP_WIDTH;
const int id = threadIdx.y%step==0?threadIdx.y/step:65000;
if(id < STEP_WIDTH ) {
result[id] = value;
}
__syncthreads();
while (shift<=max_active_thread/step) {
if(shift<=id && shift+STEP_WIDTH>id && threadIdx.x==0) {
join(result[id%STEP_WIDTH],value);
}
__syncthreads();
shift+=STEP_WIDTH;
}
value = result[0];
for(int i = 1; (i*step<=max_active_thread) && i<STEP_WIDTH; i++)
join(value,result[i]);
}
template< class ValueType , class JoinOp>
__device__
inline void cuda_intra_block_reduction( ValueType& value,
const JoinOp& join,
const int max_active_thread = blockDim.y) {
cuda_intra_warp_reduction(value,join,max_active_thread);
cuda_inter_warp_reduction(value,join,max_active_thread);
}
template< class FunctorType , class JoinOp>
__device__
bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , void >::reference_type value,
const JoinOp& join,
Cuda::size_type * const m_scratch_space,
typename FunctorValueTraits< FunctorType , void >::pointer_type const result,
Cuda::size_type * const m_scratch_flags,
const int max_active_thread = blockDim.y) {
typedef typename FunctorValueTraits< FunctorType , void >::pointer_type pointer_type;
typedef typename FunctorValueTraits< FunctorType , void >::value_type value_type;
//Do the intra-block reduction with shfl operations and static shared memory
cuda_intra_block_reduction(value,join,max_active_thread);
const unsigned id = threadIdx.y*blockDim.x + threadIdx.x;
//One thread in the block writes block result to global scratch_memory
if(id == 0 ) {
pointer_type global = ((pointer_type) m_scratch_space) + blockIdx.x;
*global = value;
}
//One warp of last block performs inter block reduction through loading the block values from global scratch_memory
bool last_block = false;
__syncthreads();
if ( id < 32 ) {
Cuda::size_type count;
//Figure out whether this is the last block
if(id == 0)
count = Kokkos::atomic_fetch_add(m_scratch_flags,1);
count = Kokkos::shfl(count,0,32);
//Last block does the inter block reduction
if( count == gridDim.x - 1) {
//set flag back to zero
if(id == 0)
*m_scratch_flags = 0;
last_block = true;
value = 0;
pointer_type const volatile global = (pointer_type) m_scratch_space ;
//Reduce all global values with splitting work over threads in one warp
const int step_size = blockDim.x*blockDim.y < 32 ? blockDim.x*blockDim.y : 32;
for(int i=id; i<gridDim.x; i+=step_size) {
value_type tmp = global[i];
join(value, tmp);
}
//Perform shfl reductions within the warp only join if contribution is valid (allows gridDim.x non power of two and <32)
if (blockDim.x*blockDim.y > 1) {
value_type tmp = Kokkos::shfl_down(value, 1,32);
if( id + 1 < gridDim.x )
join(value, tmp);
}
if (blockDim.x*blockDim.y > 2) {
value_type tmp = Kokkos::shfl_down(value, 2,32);
if( id + 2 < gridDim.x )
join(value, tmp);
}
if (blockDim.x*blockDim.y > 4) {
value_type tmp = Kokkos::shfl_down(value, 4,32);
if( id + 4 < gridDim.x )
join(value, tmp);
}
if (blockDim.x*blockDim.y > 8) {
value_type tmp = Kokkos::shfl_down(value, 8,32);
if( id + 8 < gridDim.x )
join(value, tmp);
}
if (blockDim.x*blockDim.y > 16) {
value_type tmp = Kokkos::shfl_down(value, 16,32);
if( id + 16 < gridDim.x )
join(value, tmp);
}
}
}
//The last block has in its thread=0 the global reduction value through "value"
return last_block;
}
//----------------------------------------------------------------------------
// See section B.17 of Cuda C Programming Guide Version 3.2
// for discussion of
// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor)
// function qualifier which could be used to improve performance.
//----------------------------------------------------------------------------
// Maximize shared memory and minimize L1 cache:
// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferShared );
// For 2.0 capability: 48 KB shared and 16 KB L1
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/*
* Algorithmic constraints:
* (a) blockDim.y is a power of two
* (b) blockDim.y <= 512
* (c) blockDim.x == blockDim.z == 1
*/
template< bool DoScan , class FunctorType , class ArgTag >
__device__
void cuda_intra_block_reduce_scan( const FunctorType & functor ,
const typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type base_data )
{
typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ;
typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
typedef typename ValueTraits::pointer_type pointer_type ;
const unsigned value_count = ValueTraits::value_count( functor );
const unsigned BlockSizeMask = blockDim.y - 1 ;
// Must have power of two thread count
if ( BlockSizeMask & blockDim.y ) { Kokkos::abort("Cuda::cuda_intra_block_scan requires power-of-two blockDim"); }
#define BLOCK_REDUCE_STEP( R , TD , S ) \
if ( ! ( R & ((1<<(S+1))-1) ) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S)) ); }
#define BLOCK_SCAN_STEP( TD , N , S ) \
if ( N == (1<<S) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S))); }
const unsigned rtid_intra = threadIdx.y ^ BlockSizeMask ;
const pointer_type tdata_intra = base_data + value_count * threadIdx.y ;
{ // Intra-warp reduction:
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,0)
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,1)
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,2)
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,3)
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,4)
}
__syncthreads(); // Wait for all warps to reduce
{ // Inter-warp reduce-scan by a single warp to avoid extra synchronizations
const unsigned rtid_inter = ( threadIdx.y ^ BlockSizeMask ) << CudaTraits::WarpIndexShift ;
if ( rtid_inter < blockDim.y ) {
const pointer_type tdata_inter = base_data + value_count * ( rtid_inter ^ BlockSizeMask );
if ( (1<<5) < BlockSizeMask ) { BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,5) }
if ( (1<<6) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,6) }
if ( (1<<7) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,7) }
if ( (1<<8) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,8) }
if ( DoScan ) {
int n = ( rtid_inter & 32 ) ? 32 : (
( rtid_inter & 64 ) ? 64 : (
( rtid_inter & 128 ) ? 128 : (
( rtid_inter & 256 ) ? 256 : 0 )));
if ( ! ( rtid_inter + n < blockDim.y ) ) n = 0 ;
BLOCK_SCAN_STEP(tdata_inter,n,8)
BLOCK_SCAN_STEP(tdata_inter,n,7)
BLOCK_SCAN_STEP(tdata_inter,n,6)
BLOCK_SCAN_STEP(tdata_inter,n,5)
}
}
}
__syncthreads(); // Wait for inter-warp reduce-scan to complete
if ( DoScan ) {
int n = ( rtid_intra & 1 ) ? 1 : (
( rtid_intra & 2 ) ? 2 : (
( rtid_intra & 4 ) ? 4 : (
( rtid_intra & 8 ) ? 8 : (
( rtid_intra & 16 ) ? 16 : 0 ))));
if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ;
BLOCK_SCAN_STEP(tdata_intra,n,4) __threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,3) __threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,2) __threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,1) __threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,0)
}
#undef BLOCK_SCAN_STEP
#undef BLOCK_REDUCE_STEP
}
//----------------------------------------------------------------------------
/**\brief Input value-per-thread starting at 'shared_data'.
* Reduction value at last thread's location.
*
* If 'DoScan' then write blocks' scan values and block-groups' scan values.
*
* Global reduce result is in the last threads' 'shared_data' location.
*/
template< bool DoScan , class FunctorType , class ArgTag >
__device__
bool cuda_single_inter_block_reduce_scan( const FunctorType & functor ,
const Cuda::size_type block_id ,
const Cuda::size_type block_count ,
Cuda::size_type * const shared_data ,
Cuda::size_type * const global_data ,
Cuda::size_type * const global_flags )
{
typedef Cuda::size_type size_type ;
typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ;
typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
typedef FunctorValueInit< FunctorType , ArgTag > ValueInit ;
typedef FunctorValueOps< FunctorType , ArgTag > ValueOps ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
const unsigned BlockSizeMask = blockDim.y - 1 ;
const unsigned BlockSizeShift = power_of_two_if_valid( blockDim.y );
// Must have power of two thread count
if ( BlockSizeMask & blockDim.y ) { Kokkos::abort("Cuda::cuda_single_inter_block_reduce_scan requires power-of-two blockDim"); }
const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) >
word_count( ValueTraits::value_size( functor ) / sizeof(size_type) );
// Reduce the accumulation for the entire block.
cuda_intra_block_reduce_scan<false,FunctorType,ArgTag>( functor , pointer_type(shared_data) );
{
// Write accumulation total to global scratch space.
// Accumulation total is the last thread's data.
size_type * const shared = shared_data + word_count.value * BlockSizeMask ;
size_type * const global = global_data + word_count.value * block_id ;
for ( size_type i = threadIdx.y ; i < word_count.value ; i += blockDim.y ) { global[i] = shared[i] ; }
}
// Contributing blocks note that their contribution has been completed via an atomic-increment flag
// If this block is not the last block to contribute to this group then the block is done.
const bool is_last_block =
! __syncthreads_or( threadIdx.y ? 0 : ( 1 + atomicInc( global_flags , block_count - 1 ) < block_count ) );
if ( is_last_block ) {
const size_type b = ( long(block_count) * long(threadIdx.y) ) >> BlockSizeShift ;
const size_type e = ( long(block_count) * long( threadIdx.y + 1 ) ) >> BlockSizeShift ;
{
void * const shared_ptr = shared_data + word_count.value * threadIdx.y ;
reference_type shared_value = ValueInit::init( functor , shared_ptr );
for ( size_type i = b ; i < e ; ++i ) {
ValueJoin::join( functor , shared_ptr , global_data + word_count.value * i );
}
}
cuda_intra_block_reduce_scan<DoScan,FunctorType,ArgTag>( functor , pointer_type(shared_data) );
if ( DoScan ) {
size_type * const shared_value = shared_data + word_count.value * ( threadIdx.y ? threadIdx.y - 1 : blockDim.y );
if ( ! threadIdx.y ) { ValueInit::init( functor , shared_value ); }
// Join previous inclusive scan value to each member
for ( size_type i = b ; i < e ; ++i ) {
size_type * const global_value = global_data + word_count.value * i ;
ValueJoin::join( functor , shared_value , global_value );
ValueOps ::copy( functor , global_value , shared_value );
}
}
}
return is_last_block ;
}
// Size in bytes required for inter block reduce or scan
template< bool DoScan , class FunctorType , class ArgTag >
inline
unsigned cuda_single_inter_block_reduce_scan_shmem( const FunctorType & functor , const unsigned BlockSize )
{
return ( BlockSize + 2 ) * Impl::FunctorValueTraits< FunctorType , ArgTag >::value_size( functor );
}
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( __CUDACC__ ) */
#endif /* KOKKOS_CUDA_REDUCESCAN_HPP */

View File

@ -1,298 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_VECTORIZATION_HPP
#define KOKKOS_CUDA_VECTORIZATION_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <Kokkos_Cuda.hpp>
namespace Kokkos {
// Shuffle only makes sense on >= Kepler GPUs; it doesn't work on CPUs
// or other GPUs. We provide a generic definition (which is trivial
// and doesn't do what it claims to do) because we don't actually use
// this function unless we are on a suitable GPU, with a suitable
// Scalar type. (For example, in the mat-vec, the "ThreadsPerRow"
// internal parameter depends both on the ExecutionSpace and the Scalar type,
// and it controls whether shfl_down() gets called.)
namespace Impl {
template< typename Scalar >
struct shfl_union {
enum {n = sizeof(Scalar)/4};
float fval[n];
KOKKOS_INLINE_FUNCTION
Scalar value() {
return *(Scalar*) fval;
}
KOKKOS_INLINE_FUNCTION
void operator= (Scalar& value_) {
float* const val_ptr = (float*) &value_;
for(int i=0; i<n ; i++) {
fval[i] = val_ptr[i];
}
}
KOKKOS_INLINE_FUNCTION
void operator= (const Scalar& value_) {
float* const val_ptr = (float*) &value_;
for(int i=0; i<n ; i++) {
fval[i] = val_ptr[i];
}
}
};
}
#ifdef __CUDA_ARCH__
#if (__CUDA_ARCH__ >= 300)
KOKKOS_INLINE_FUNCTION
int shfl(const int &val, const int& srcLane, const int& width ) {
return __shfl(val,srcLane,width);
}
KOKKOS_INLINE_FUNCTION
float shfl(const float &val, const int& srcLane, const int& width ) {
return __shfl(val,srcLane,width);
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type& width
) {
Scalar tmp1 = val;
float tmp = *reinterpret_cast<float*>(&tmp1);
tmp = __shfl(tmp,srcLane,width);
return *reinterpret_cast<Scalar*>(&tmp);
}
KOKKOS_INLINE_FUNCTION
double shfl(const double &val, const int& srcLane, const int& width) {
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl(lo,srcLane,width);
hi = __shfl(hi,srcLane,width);
return __hiloint2double(hi,lo);
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) == 8) ,int>::type& width) {
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
lo = __shfl(lo,srcLane,width);
hi = __shfl(hi,srcLane,width);
const double tmp = __hiloint2double(hi,lo);
return *(reinterpret_cast<const Scalar*>(&tmp));
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) > 8) ,int>::type& width) {
Impl::shfl_union<Scalar> s_val;
Impl::shfl_union<Scalar> r_val;
s_val = val;
for(int i = 0; i<s_val.n; i++)
r_val.fval[i] = __shfl(s_val.fval[i],srcLane,width);
return r_val.value();
}
KOKKOS_INLINE_FUNCTION
int shfl_down(const int &val, const int& delta, const int& width) {
return __shfl_down(val,delta,width);
}
KOKKOS_INLINE_FUNCTION
float shfl_down(const float &val, const int& delta, const int& width) {
return __shfl_down(val,delta,width);
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) {
Scalar tmp1 = val;
float tmp = *reinterpret_cast<float*>(&tmp1);
tmp = __shfl_down(tmp,delta,width);
return *reinterpret_cast<Scalar*>(&tmp);
}
KOKKOS_INLINE_FUNCTION
double shfl_down(const double &val, const int& delta, const int& width) {
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl_down(lo,delta,width);
hi = __shfl_down(hi,delta,width);
return __hiloint2double(hi,lo);
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) {
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
lo = __shfl_down(lo,delta,width);
hi = __shfl_down(hi,delta,width);
const double tmp = __hiloint2double(hi,lo);
return *(reinterpret_cast<const Scalar*>(&tmp));
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) > 8) , int >::type & width) {
Impl::shfl_union<Scalar> s_val;
Impl::shfl_union<Scalar> r_val;
s_val = val;
for(int i = 0; i<s_val.n; i++)
r_val.fval[i] = __shfl_down(s_val.fval[i],delta,width);
return r_val.value();
}
KOKKOS_INLINE_FUNCTION
int shfl_up(const int &val, const int& delta, const int& width ) {
return __shfl_up(val,delta,width);
}
KOKKOS_INLINE_FUNCTION
float shfl_up(const float &val, const int& delta, const int& width ) {
return __shfl_up(val,delta,width);
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) {
Scalar tmp1 = val;
float tmp = *reinterpret_cast<float*>(&tmp1);
tmp = __shfl_up(tmp,delta,width);
return *reinterpret_cast<Scalar*>(&tmp);
}
KOKKOS_INLINE_FUNCTION
double shfl_up(const double &val, const int& delta, const int& width ) {
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl_up(lo,delta,width);
hi = __shfl_up(hi,delta,width);
return __hiloint2double(hi,lo);
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) {
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
lo = __shfl_up(lo,delta,width);
hi = __shfl_up(hi,delta,width);
const double tmp = __hiloint2double(hi,lo);
return *(reinterpret_cast<const Scalar*>(&tmp));
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) > 8) , int >::type & width) {
Impl::shfl_union<Scalar> s_val;
Impl::shfl_union<Scalar> r_val;
s_val = val;
for(int i = 0; i<s_val.n; i++)
r_val.fval[i] = __shfl_up(s_val.fval[i],delta,width);
return r_val.value();
}
#else
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl(const Scalar &val, const int& srcLane, const int& width) {
if(width > 1) Kokkos::abort("Error: calling shfl from a device with CC<3.0.");
return val;
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_down(const Scalar &val, const int& delta, const int& width) {
if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0.");
return val;
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_up(const Scalar &val, const int& delta, const int& width) {
if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0.");
return val;
}
#endif
#else
template<typename Scalar>
inline
Scalar shfl(const Scalar &val, const int& srcLane, const int& width) {
if(width > 1) Kokkos::abort("Error: calling shfl from a device with CC<3.0.");
return val;
}
template<typename Scalar>
inline
Scalar shfl_down(const Scalar &val, const int& delta, const int& width) {
if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0.");
return val;
}
template<typename Scalar>
inline
Scalar shfl_up(const Scalar &val, const int& delta, const int& width) {
if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0.");
return val;
}
#endif
}
#endif // KOKKOS_HAVE_CUDA
#endif

View File

@ -1,427 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_VIEW_HPP
#define KOKKOS_CUDA_VIEW_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <cstring>
#include <Kokkos_HostSpace.hpp>
#include <Kokkos_CudaSpace.hpp>
#include <Kokkos_View.hpp>
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template<>
struct AssertShapeBoundsAbort< CudaSpace >
{
KOKKOS_INLINE_FUNCTION
static void apply( const size_t /* rank */ ,
const size_t /* n0 */ , const size_t /* n1 */ ,
const size_t /* n2 */ , const size_t /* n3 */ ,
const size_t /* n4 */ , const size_t /* n5 */ ,
const size_t /* n6 */ , const size_t /* n7 */ ,
const size_t /* arg_rank */ ,
const size_t /* i0 */ , const size_t /* i1 */ ,
const size_t /* i2 */ , const size_t /* i3 */ ,
const size_t /* i4 */ , const size_t /* i5 */ ,
const size_t /* i6 */ , const size_t /* i7 */ )
{
Kokkos::abort("Kokkos::View array bounds violation");
}
};
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
// Cuda Texture fetches can be performed for 4, 8 and 16 byte objects (int,int2,int4)
// Via reinterpret_case this can be used to support all scalar types of those sizes.
// Any other scalar type falls back to either normal reads out of global memory,
// or using the __ldg intrinsic on Kepler GPUs or newer (Compute Capability >= 3.0)
template< typename ValueType
, class MemorySpace
, class AliasType =
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 4 ) , int ,
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 8 ) , ::int2 ,
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 16 ) , ::int4 ,
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 32 ) , ::float4 ,void
>::type
>::type
>::type
>::type
>
class CudaTextureFetch {
private:
cuda_texture_object_type m_obj ;
const ValueType * m_alloc_ptr ;
int m_offset ;
void attach( const ValueType * const arg_ptr, AllocationTracker const & tracker )
{
typedef char const * const byte;
m_alloc_ptr = reinterpret_cast<ValueType *>(tracker.alloc_ptr());
size_t byte_offset = reinterpret_cast<byte>(arg_ptr) - reinterpret_cast<byte>(m_alloc_ptr);
const bool ok_aligned = 0 == byte_offset % sizeof(ValueType);
const size_t count = tracker.alloc_size() / sizeof(ValueType);
const bool ok_contains = (m_alloc_ptr <= arg_ptr) && (arg_ptr < (m_alloc_ptr + count));
if (ok_aligned && ok_contains) {
if (tracker.attribute() == NULL ) {
MemorySpace::texture_object_attach(
tracker
, sizeof(ValueType)
, cudaCreateChannelDesc< AliasType >()
);
}
m_obj = dynamic_cast<TextureAttribute*>(tracker.attribute())->m_tex_obj;
m_offset = arg_ptr - m_alloc_ptr;
}
else if( !ok_contains ) {
throw_runtime_exception("Error: cannot attach a texture object to a tracker which does not bound the pointer.");
}
else {
throw_runtime_exception("Error: cannot attach a texture object to an incorrectly aligned pointer.");
}
}
public:
KOKKOS_INLINE_FUNCTION
CudaTextureFetch() : m_obj() , m_alloc_ptr() , m_offset() {}
KOKKOS_INLINE_FUNCTION
~CudaTextureFetch() {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const CudaTextureFetch & rhs )
: m_obj( rhs.m_obj )
, m_alloc_ptr( rhs.m_alloc_ptr )
, m_offset( rhs.m_offset )
{}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs )
{
m_obj = rhs.m_obj ;
m_alloc_ptr = rhs.m_alloc_ptr ;
m_offset = rhs.m_offset ;
return *this ;
}
KOKKOS_INLINE_FUNCTION explicit
CudaTextureFetch( const ValueType * const arg_ptr, AllocationTracker const & tracker )
: m_obj( 0 ) , m_alloc_ptr(0) , m_offset(0)
{
#if defined( KOKKOS_USE_LDG_INTRINSIC )
m_alloc_ptr(arg_ptr);
#elif defined( __CUDACC__ ) && ! defined( __CUDA_ARCH__ )
if ( arg_ptr != NULL ) {
if ( tracker.is_valid() ) {
attach( arg_ptr, tracker );
}
else {
AllocationTracker found_tracker = AllocationTracker::find<typename MemorySpace::allocator>(arg_ptr);
if ( found_tracker.is_valid() ) {
attach( arg_ptr, found_tracker );
} else {
throw_runtime_exception("Error: cannot attach a texture object to an untracked pointer!");
}
}
}
#endif
}
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_alloc_ptr + m_offset ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
#if defined( KOKKOS_USE_LDG_INTRINSIC ) && defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
AliasType v = __ldg(reinterpret_cast<AliasType*>(&m_alloc_ptr[i]));
return *(reinterpret_cast<ValueType*> (&v));
#elif defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
AliasType v = tex1Dfetch<AliasType>( m_obj , i + m_offset );
return *(reinterpret_cast<ValueType*> (&v));
#else
return m_alloc_ptr[ i + m_offset ];
#endif
}
};
template< typename ValueType, class MemorySpace >
class CudaTextureFetch< const ValueType, MemorySpace, float4 > {
private:
typedef float4 AliasType;
cuda_texture_object_type m_obj ;
const ValueType * m_alloc_ptr ;
int m_offset ;
void attach( const ValueType * const arg_ptr, AllocationTracker const & tracker )
{
typedef char const * const byte;
m_alloc_ptr = reinterpret_cast<ValueType *>(tracker.alloc_ptr());
size_t byte_offset = reinterpret_cast<byte>(arg_ptr) - reinterpret_cast<byte>(m_alloc_ptr);
const bool ok_aligned = 0 == byte_offset % sizeof(ValueType);
const size_t count = tracker.alloc_size() / sizeof(ValueType);
const bool ok_contains = (m_alloc_ptr <= arg_ptr) && (arg_ptr < (m_alloc_ptr + count));
if (ok_aligned && ok_contains) {
if (tracker.attribute() == NULL ) {
MemorySpace::texture_object_attach(
tracker
, sizeof(ValueType)
, cudaCreateChannelDesc< AliasType >()
);
}
m_obj = dynamic_cast<TextureAttribute*>(tracker.attribute())->m_tex_obj;
m_offset = arg_ptr - m_alloc_ptr;
}
else if( !ok_contains ) {
throw_runtime_exception("Error: cannot attach a texture object to a tracker which does not bound the pointer.");
}
else {
throw_runtime_exception("Error: cannot attach a texture object to an incorrectly aligned pointer.");
}
}
public:
KOKKOS_INLINE_FUNCTION
CudaTextureFetch() : m_obj() , m_alloc_ptr() , m_offset() {}
KOKKOS_INLINE_FUNCTION
~CudaTextureFetch() {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const CudaTextureFetch & rhs )
: m_obj( rhs.m_obj )
, m_alloc_ptr( rhs.m_alloc_ptr )
, m_offset( rhs.m_offset )
{}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs )
{
m_obj = rhs.m_obj ;
m_alloc_ptr = rhs.m_alloc_ptr ;
m_offset = rhs.m_offset ;
return *this ;
}
KOKKOS_INLINE_FUNCTION explicit
CudaTextureFetch( const ValueType * const arg_ptr, AllocationTracker const & tracker )
: m_obj( 0 ) , m_alloc_ptr(0) , m_offset(0)
{
#if defined( KOKKOS_USE_LDG_INTRINSIC )
m_alloc_ptr(arg_ptr);
#elif defined( __CUDACC__ ) && ! defined( __CUDA_ARCH__ )
if ( arg_ptr != NULL ) {
if ( tracker.is_valid() ) {
attach( arg_ptr, tracker );
}
else {
AllocationTracker found_tracker = AllocationTracker::find<typename MemorySpace::allocator>(arg_ptr);
if ( found_tracker.is_valid() ) {
attach( arg_ptr, found_tracker );
} else {
throw_runtime_exception("Error: cannot attach a texture object to an untracked pointer!");
}
}
}
#endif
}
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_alloc_ptr + m_offset ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
#if defined( KOKKOS_USE_LDG_INTRINSIC ) && defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
AliasType v = __ldg(reinterpret_cast<AliasType*>(&m_alloc_ptr[i]));
return *(reinterpret_cast<ValueType*> (&v));
#elif defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
union Float4ValueType {
float4 f4[2];
ValueType val;
};
Float4ValueType convert;
convert.f4[0] = tex1Dfetch<AliasType>( m_obj , 2*(i + m_offset) );
convert.f4[1] = tex1Dfetch<AliasType>( m_obj , 2*(i + m_offset)+1 );
return convert.val;
#else
return m_alloc_ptr[ i + m_offset ];
#endif
}
};
template< typename ValueType, class MemorySpace >
class CudaTextureFetch< const ValueType, MemorySpace, void >
{
private:
const ValueType * m_ptr ;
public:
KOKKOS_INLINE_FUNCTION
CudaTextureFetch() : m_ptr(0) {};
KOKKOS_INLINE_FUNCTION
~CudaTextureFetch() {
}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const ValueType * ptr, const AllocationTracker & ) : m_ptr(ptr) {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const CudaTextureFetch & rhs ) : m_ptr(rhs.m_ptr) {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs ) {
m_ptr = rhs.m_ptr;
return *this ;
}
explicit KOKKOS_INLINE_FUNCTION
CudaTextureFetch( ValueType * const base_view_ptr, AllocationTracker const & /*tracker*/ ) {
m_ptr = base_view_ptr;
}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = (const ValueType* base_view_ptr) {
m_ptr = base_view_ptr;
return *this;
}
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_ptr ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
return m_ptr[ i ];
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** \brief Replace Default ViewDataHandle with Cuda texture fetch specialization
* if 'const' value type, CudaSpace and random access.
*/
template< class ViewTraits >
class ViewDataHandle< ViewTraits ,
typename enable_if< ( is_same< typename ViewTraits::memory_space,CudaSpace>::value ||
is_same< typename ViewTraits::memory_space,CudaUVMSpace>::value )
&&
is_same<typename ViewTraits::const_value_type,typename ViewTraits::value_type>::value
&&
ViewTraits::memory_traits::RandomAccess
>::type >
{
public:
enum { ReturnTypeIsReference = false };
typedef Impl::CudaTextureFetch< typename ViewTraits::value_type
, typename ViewTraits::memory_space> handle_type;
KOKKOS_INLINE_FUNCTION
static handle_type create_handle( typename ViewTraits::value_type * arg_data_ptr, AllocationTracker const & arg_tracker )
{
return handle_type(arg_data_ptr, arg_tracker);
}
typedef typename ViewTraits::value_type return_type;
};
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif // KOKKOS_HAVE_CUDA
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */

View File

@ -1,119 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_ABORT_HPP
#define KOKKOS_CUDA_ABORT_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#include "Kokkos_Macros.hpp"
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
#include <cuda.h>
#if ! defined( CUDA_VERSION ) || ( CUDA_VERSION < 4010 )
#error "Cuda version 4.1 or greater required"
#endif
#if ( __CUDA_ARCH__ < 200 )
#error "Cuda device capability 2.0 or greater required"
#endif
extern "C" {
/* Cuda runtime function, declared in <crt/device_runtime.h>
* Requires capability 2.x or better.
*/
extern __device__ void __assertfail(
const void *message,
const void *file,
unsigned int line,
const void *function,
size_t charsize);
}
namespace Kokkos {
namespace Impl {
__device__ inline
void cuda_abort( const char * const message )
{
#ifndef __APPLE__
const char empty[] = "" ;
__assertfail( (const void *) message ,
(const void *) empty ,
(unsigned int) 0 ,
(const void *) empty ,
sizeof(char) );
#endif
}
} // namespace Impl
} // namespace Kokkos
#else
namespace Kokkos {
namespace Impl {
KOKKOS_INLINE_FUNCTION
void cuda_abort( const char * const ) {}
}
}
#endif /* #if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA )
namespace Kokkos {
__device__ inline
void abort( const char * const message ) { Kokkos::Impl::cuda_abort(message); }
}
#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */

File diff suppressed because it is too large Load Diff

View File

@ -1,300 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ARRAY
#define KOKKOS_ARRAY
#include <type_traits>
#include <limits>
namespace Kokkos {
/**\brief Derived from the C++17 'std::array'.
* Dropping the iterator interface.
*/
template< class T = void
, size_t N = ~size_t(0)
, class Proxy = void
>
struct Array {
private:
T m_elem[N];
public:
typedef T & reference ;
typedef typename std::add_const<T>::type & const_reference ;
typedef size_t size_type ;
typedef ptrdiff_t difference_type ;
typedef T value_type ;
typedef T * pointer ;
typedef typename std::add_const<T>::type * const_pointer ;
KOKKOS_INLINE_FUNCTION static constexpr size_type size() { return N ; }
KOKKOS_INLINE_FUNCTION static constexpr bool empty(){ return false ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
reference operator[]( const iType & i )
{
static_assert( std::is_integral<iType>::value , "Must be integral argument" );
return m_elem[i];
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
const_reference operator[]( const iType & i ) const
{
static_assert( std::is_integral<iType>::value , "Must be integral argument" );
return m_elem[i];
}
KOKKOS_INLINE_FUNCTION pointer data() { return & m_elem[0] ; }
KOKKOS_INLINE_FUNCTION const_pointer data() const { return & m_elem[0] ; }
~Array() = default ;
Array() = default ;
Array( const Array & ) = default ;
Array & operator = ( const Array & ) = default ;
// Some supported compilers are not sufficiently C++11 compliant
// for default move constructor and move assignment operator.
// Array( Array && ) = default ;
// Array & operator = ( Array && ) = default ;
};
template< class T , class Proxy >
struct Array<T,0,Proxy> {
public:
typedef typename std::add_const<T>::type & reference ;
typedef typename std::add_const<T>::type & const_reference ;
typedef size_t size_type ;
typedef ptrdiff_t difference_type ;
typedef typename std::add_const<T>::type value_type ;
typedef typename std::add_const<T>::type * pointer ;
typedef typename std::add_const<T>::type * const_pointer ;
KOKKOS_INLINE_FUNCTION static constexpr size_type size() { return 0 ; }
KOKKOS_INLINE_FUNCTION static constexpr bool empty() { return true ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
value_type operator[]( const iType & )
{
static_assert( std::is_integral<iType>::value , "Must be integer argument" );
return value_type();
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
value_type operator[]( const iType & ) const
{
static_assert( std::is_integral<iType>::value , "Must be integer argument" );
return value_type();
}
KOKKOS_INLINE_FUNCTION pointer data() { return pointer(0) ; }
KOKKOS_INLINE_FUNCTION const_pointer data() const { return const_pointer(0); }
~Array() = default ;
Array() = default ;
Array( const Array & ) = default ;
Array & operator = ( const Array & ) = default ;
// Some supported compilers are not sufficiently C++11 compliant
// for default move constructor and move assignment operator.
// Array( Array && ) = default ;
// Array & operator = ( Array && ) = default ;
};
template<>
struct Array<void,~size_t(0),void>
{
struct contiguous {};
struct strided {};
};
template< class T >
struct Array< T , ~size_t(0) , Array<>::contiguous >
{
private:
T * m_elem ;
size_t m_size ;
public:
typedef T & reference ;
typedef typename std::add_const<T>::type & const_reference ;
typedef size_t size_type ;
typedef ptrdiff_t difference_type ;
typedef T value_type ;
typedef T * pointer ;
typedef typename std::add_const<T>::type * const_pointer ;
KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_size ; }
KOKKOS_INLINE_FUNCTION constexpr bool empty() const { return 0 != m_size ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
reference operator[]( const iType & i )
{
static_assert( std::is_integral<iType>::value , "Must be integral argument" );
return m_elem[i];
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
const_reference operator[]( const iType & i ) const
{
static_assert( std::is_integral<iType>::value , "Must be integral argument" );
return m_elem[i];
}
KOKKOS_INLINE_FUNCTION pointer data() { return m_elem ; }
KOKKOS_INLINE_FUNCTION const_pointer data() const { return m_elem ; }
~Array() = default ;
Array() = delete ;
Array( const Array & rhs ) = delete ;
// Some supported compilers are not sufficiently C++11 compliant
// for default move constructor and move assignment operator.
// Array( Array && rhs ) = default ;
// Array & operator = ( Array && rhs ) = delete ;
KOKKOS_INLINE_FUNCTION
Array & operator = ( const Array & rhs )
{
const size_t n = std::min( m_size , rhs.size() );
for ( size_t i = 0 ; i < n ; ++i ) m_elem[i] = rhs[i] ;
return *this ;
}
template< size_t N , class P >
KOKKOS_INLINE_FUNCTION
Array & operator = ( const Array<T,N,P> & rhs )
{
const size_t n = std::min( m_size , rhs.size() );
for ( size_t i = 0 ; i < n ; ++i ) m_elem[i] = rhs[i] ;
return *this ;
}
KOKKOS_INLINE_FUNCTION constexpr Array( pointer arg_ptr , size_type arg_size , size_type = 0 )
: m_elem(arg_ptr), m_size(arg_size) {}
};
template< class T >
struct Array< T , ~size_t(0) , Array<>::strided >
{
private:
T * m_elem ;
size_t m_size ;
size_t m_stride ;
public:
typedef T & reference ;
typedef typename std::add_const<T>::type & const_reference ;
typedef size_t size_type ;
typedef ptrdiff_t difference_type ;
typedef T value_type ;
typedef T * pointer ;
typedef typename std::add_const<T>::type * const_pointer ;
KOKKOS_INLINE_FUNCTION constexpr size_type size() const { return m_size ; }
KOKKOS_INLINE_FUNCTION constexpr bool empty() const { return 0 != m_size ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
reference operator[]( const iType & i )
{
static_assert( std::is_integral<iType>::value , "Must be integral argument" );
return m_elem[i*m_stride];
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
const_reference operator[]( const iType & i ) const
{
static_assert( std::is_integral<iType>::value , "Must be integral argument" );
return m_elem[i*m_stride];
}
KOKKOS_INLINE_FUNCTION pointer data() { return m_elem ; }
KOKKOS_INLINE_FUNCTION const_pointer data() const { return m_elem ; }
~Array() = default ;
Array() = delete ;
Array( const Array & ) = delete ;
// Some supported compilers are not sufficiently C++11 compliant
// for default move constructor and move assignment operator.
// Array( Array && rhs ) = default ;
// Array & operator = ( Array && rhs ) = delete ;
KOKKOS_INLINE_FUNCTION
Array & operator = ( const Array & rhs )
{
const size_t n = std::min( m_size , rhs.size() );
for ( size_t i = 0 ; i < n ; ++i ) m_elem[i] = rhs[i] ;
return *this ;
}
template< size_t N , class P >
KOKKOS_INLINE_FUNCTION
Array & operator = ( const Array<T,N,P> & rhs )
{
const size_t n = std::min( m_size , rhs.size() );
for ( size_t i = 0 ; i < n ; ++i ) m_elem[i] = rhs[i] ;
return *this ;
}
KOKKOS_INLINE_FUNCTION constexpr Array( pointer arg_ptr , size_type arg_size , size_type arg_stride )
: m_elem(arg_ptr), m_size(arg_size), m_stride(arg_stride) {}
};
} // namespace Kokkos
#endif /* #ifndef KOKKOS_ARRAY */

View File

@ -1,285 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_Atomic.hpp
/// \brief Atomic functions
///
/// This header file defines prototypes for the following atomic functions:
/// - exchange
/// - compare and exchange
/// - add
///
/// Supported types include:
/// - signed and unsigned 4 and 8 byte integers
/// - float
/// - double
///
/// They are implemented through GCC compatible intrinsics, OpenMP
/// directives and native CUDA intrinsics.
///
/// Including this header file requires one of the following
/// compilers:
/// - NVCC (for CUDA device code only)
/// - GCC (for host code only)
/// - Intel (for host code only)
/// - A compiler that supports OpenMP 3.1 (for host code only)
#ifndef KOKKOS_ATOMIC_HPP
#define KOKKOS_ATOMIC_HPP
#include <Kokkos_Macros.hpp>
#include <Kokkos_HostSpace.hpp>
#include <impl/Kokkos_Traits.hpp>
//----------------------------------------------------------------------------
#if defined(_WIN32)
#define KOKKOS_ATOMICS_USE_WINDOWS
#else
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
// Compiling NVIDIA device code, must use Cuda atomics:
#define KOKKOS_ATOMICS_USE_CUDA
#elif ! defined( KOKKOS_ATOMICS_USE_GCC ) && \
! defined( KOKKOS_ATOMICS_USE_INTEL ) && \
! defined( KOKKOS_ATOMICS_USE_OMP31 )
// Compiling for non-Cuda atomic implementation has not been pre-selected.
// Choose the best implementation for the detected compiler.
// Preference: GCC, INTEL, OMP31
#if defined( KOKKOS_COMPILER_GNU ) || \
defined( KOKKOS_COMPILER_CLANG ) || \
( defined ( KOKKOS_COMPILER_NVCC ) && defined ( __GNUC__ ) )
#define KOKKOS_ATOMICS_USE_GCC
#elif defined( KOKKOS_COMPILER_INTEL ) || \
defined( KOKKOS_COMPILER_CRAYC )
#define KOKKOS_ATOMICS_USE_INTEL
#elif defined( _OPENMP ) && ( 201107 <= _OPENMP )
#define KOKKOS_ATOMICS_USE_OMP31
#else
#error "KOKKOS_ATOMICS_USE : Unsupported compiler"
#endif
#endif /* Not pre-selected atomic implementation */
#endif
//----------------------------------------------------------------------------
// Forward decalaration of functions supporting arbitrary sized atomics
// This is necessary since Kokkos_Atomic.hpp is internally included very early
// through Kokkos_HostSpace.hpp as well as the allocation tracker.
#ifdef KOKKOS_HAVE_CUDA
namespace Kokkos {
namespace Impl {
/// \brief Aquire a lock for the address
///
/// This function tries to aquire the lock for the hash value derived
/// from the provided ptr. If the lock is successfully aquired the
/// function returns true. Otherwise it returns false.
__device__ inline
bool lock_address_cuda_space(void* ptr);
/// \brief Release lock for the address
///
/// This function releases the lock for the hash value derived
/// from the provided ptr. This function should only be called
/// after previously successfully aquiring a lock with
/// lock_address.
__device__ inline
void unlock_address_cuda_space(void* ptr);
}
}
#endif
namespace Kokkos {
template <typename T>
KOKKOS_INLINE_FUNCTION
void atomic_add(volatile T * const dest, const T src);
// Atomic increment
template<typename T>
KOKKOS_INLINE_FUNCTION
void atomic_increment(volatile T* a);
template<typename T>
KOKKOS_INLINE_FUNCTION
void atomic_decrement(volatile T* a);
}
#if ! defined(_WIN32)
#include<impl/Kokkos_Atomic_Assembly_X86.hpp>
#endif
namespace Kokkos {
inline
const char * atomic_query_version()
{
#if defined( KOKKOS_ATOMICS_USE_CUDA )
return "KOKKOS_ATOMICS_USE_CUDA" ;
#elif defined( KOKKOS_ATOMICS_USE_GCC )
return "KOKKOS_ATOMICS_USE_GCC" ;
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
return "KOKKOS_ATOMICS_USE_INTEL" ;
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
return "KOKKOS_ATOMICS_USE_OMP31" ;
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
return "KOKKOS_ATOMICS_USE_WINDOWS";
#endif
}
} // namespace Kokkos
#ifdef _WIN32
#include "impl/Kokkos_Atomic_Windows.hpp"
#else
//#include "impl/Kokkos_Atomic_Assembly_X86.hpp"
//----------------------------------------------------------------------------
// Atomic exchange
//
// template< typename T >
// T atomic_exchange( volatile T* const dest , const T val )
// { T tmp = *dest ; *dest = val ; return tmp ; }
#include "impl/Kokkos_Atomic_Exchange.hpp"
//----------------------------------------------------------------------------
// Atomic compare-and-exchange
//
// template<class T>
// bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val)
// { bool equal = compare == *dest ; if ( equal ) { *dest = val ; } return equal ; }
#include "impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp"
//----------------------------------------------------------------------------
// Atomic fetch and add
//
// template<class T>
// T atomic_fetch_add(volatile T* const dest, const T val)
// { T tmp = *dest ; *dest += val ; return tmp ; }
#include "impl/Kokkos_Atomic_Fetch_Add.hpp"
//----------------------------------------------------------------------------
// Atomic fetch and sub
//
// template<class T>
// T atomic_fetch_sub(volatile T* const dest, const T val)
// { T tmp = *dest ; *dest -= val ; return tmp ; }
#include "impl/Kokkos_Atomic_Fetch_Sub.hpp"
//----------------------------------------------------------------------------
// Atomic fetch and or
//
// template<class T>
// T atomic_fetch_or(volatile T* const dest, const T val)
// { T tmp = *dest ; *dest = tmp | val ; return tmp ; }
#include "impl/Kokkos_Atomic_Fetch_Or.hpp"
//----------------------------------------------------------------------------
// Atomic fetch and and
//
// template<class T>
// T atomic_fetch_and(volatile T* const dest, const T val)
// { T tmp = *dest ; *dest = tmp & val ; return tmp ; }
#include "impl/Kokkos_Atomic_Fetch_And.hpp"
#endif /*Not _WIN32*/
//----------------------------------------------------------------------------
// Memory fence
//
// All loads and stores from this thread will be globally consistent before continuing
//
// void memory_fence() {...};
#include "impl/Kokkos_Memory_Fence.hpp"
//----------------------------------------------------------------------------
// Provide volatile_load and safe_load
//
// T volatile_load(T const volatile * const ptr);
//
// T const& safe_load(T const * const ptr);
// XEON PHI
// T safe_load(T const * const ptr
#include "impl/Kokkos_Volatile_Load.hpp"
#ifndef _WIN32
#include "impl/Kokkos_Atomic_Generic.hpp"
#endif
//----------------------------------------------------------------------------
// This atomic-style macro should be an inlined function, not a macro
#if defined( KOKKOS_COMPILER_GNU ) && !defined(__PGIC__)
#define KOKKOS_NONTEMPORAL_PREFETCH_LOAD(addr) __builtin_prefetch(addr,0,0)
#define KOKKOS_NONTEMPORAL_PREFETCH_STORE(addr) __builtin_prefetch(addr,1,0)
#else
#define KOKKOS_NONTEMPORAL_PREFETCH_LOAD(addr) ((void)0)
#define KOKKOS_NONTEMPORAL_PREFETCH_STORE(addr) ((void)0)
#endif
//----------------------------------------------------------------------------
#endif /* KOKKOS_ATOMIC_HPP */

View File

@ -1,235 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CORE_HPP
#define KOKKOS_CORE_HPP
//----------------------------------------------------------------------------
// Include the execution space header files for the enabled execution spaces.
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#include <Kokkos_Cuda.hpp>
#endif
#if defined( KOKKOS_HAVE_OPENMP )
#include <Kokkos_OpenMP.hpp>
#endif
#if defined( KOKKOS_HAVE_SERIAL )
#include <Kokkos_Serial.hpp>
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
#include <Kokkos_Threads.hpp>
#endif
#include <Kokkos_Pair.hpp>
#include <Kokkos_Array.hpp>
#include <Kokkos_View.hpp>
#include <Kokkos_Vectorization.hpp>
#include <Kokkos_Atomic.hpp>
#include <Kokkos_hwloc.hpp>
#include <iostream>
#ifdef KOKKOS_HAVE_CXX11
////#include <Kokkos_Complex.hpp>
#endif
//----------------------------------------------------------------------------
namespace Kokkos {
struct InitArguments {
int num_threads;
int num_numa;
int device_id;
InitArguments() {
num_threads = -1;
num_numa = -1;
device_id = -1;
}
};
void initialize(int& narg, char* arg[]);
void initialize(const InitArguments& args = InitArguments());
/** \brief Finalize the spaces that were initialized via Kokkos::initialize */
void finalize();
/** \brief Finalize all known execution spaces */
void finalize_all();
void fence();
}
#ifdef KOKKOS_HAVE_CXX11
namespace Kokkos {
namespace Impl {
// should only by used by kokkos_malloc and kokkos_free
struct MallocHelper
{
static void increment_ref_count( AllocationTracker const & tracker )
{
tracker.increment_ref_count();
}
static void decrement_ref_count( AllocationTracker const & tracker )
{
tracker.decrement_ref_count();
}
};
} // namespace Impl
/* Allocate memory from a memory space.
* The allocation is tracked in Kokkos memory tracking system, so
* leaked memory can be identified.
*/
template< class Arg = DefaultExecutionSpace>
void* kokkos_malloc(const std::string label, size_t count) {
if(count == 0) return NULL;
typedef typename Arg::memory_space MemorySpace;
Impl::AllocationTracker tracker = MemorySpace::allocate_and_track(label,count);;
Impl::MallocHelper::increment_ref_count( tracker );
return tracker.alloc_ptr();
}
template< class Arg = DefaultExecutionSpace>
void* kokkos_malloc(const size_t& count) {
return kokkos_malloc<Arg>("DefaultLabel",count);
}
/* Free memory from a memory space.
*/
template< class Arg = DefaultExecutionSpace>
void kokkos_free(const void* ptr) {
typedef typename Arg::memory_space MemorySpace;
typedef typename MemorySpace::allocator allocator;
Impl::AllocationTracker tracker = Impl::AllocationTracker::find<allocator>(ptr);
if (tracker.is_valid()) {
Impl::MallocHelper::decrement_ref_count( tracker );
}
}
template< class Arg = DefaultExecutionSpace>
const void* kokkos_realloc(const void* old_ptr, size_t size) {
typedef typename Arg::memory_space MemorySpace;
typedef typename MemorySpace::allocator allocator;
Impl::AllocationTracker tracker = Impl::AllocationTracker::find<allocator>(old_ptr);
tracker.reallocate(size);
return tracker.alloc_ptr();
}
} // namespace Kokkos
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
inline
void * kokkos_malloc( const size_t arg_alloc_size )
{
typedef typename Space::memory_space MemorySpace ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordHost ;
RecordHost * const r = RecordHost::allocate( MemorySpace() , "kokkos_malloc" , arg_alloc_size );
RecordBase::increment( r );
return r->data();
}
template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
inline
void kokkos_free( void * arg_alloc )
{
typedef typename Space::memory_space MemorySpace ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordHost ;
RecordHost * const r = RecordHost::get_record( arg_alloc );
RecordBase::decrement( r );
}
template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
inline
void * kokkos_realloc( void * arg_alloc , const size_t arg_alloc_size )
{
typedef typename Space::memory_space MemorySpace ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordHost ;
RecordHost * const r_old = RecordHost::get_record( arg_alloc );
RecordHost * const r_new = RecordHost::allocate( MemorySpace() , "kokkos_malloc" , arg_alloc_size );
Kokkos::Impl::DeepCopy<MemorySpace,MemorySpace>( r_new->data() , r_old->data()
, std::min( r_old->size() , r_new->size() ) );
RecordBase::increment( r_new );
RecordBase::decrement( r_old );
return r_new->data();
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -1,170 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CORE_FWD_HPP
#define KOKKOS_CORE_FWD_HPP
//----------------------------------------------------------------------------
// Kokkos_Macros.hpp does introspection on configuration options
// and compiler environment then sets a collection of #define macros.
#include <Kokkos_Macros.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Forward declarations for class inter-relationships
namespace Kokkos {
class HostSpace ; ///< Memory space for main process and CPU execution spaces
#if defined( KOKKOS_HAVE_SERIAL )
class Serial ; ///< Execution space main process on CPU
#endif // defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_HAVE_PTHREAD )
class Threads ; ///< Execution space with pthreads back-end
#endif
#if defined( KOKKOS_HAVE_OPENMP )
class OpenMP ; ///< OpenMP execution space
#endif
#if defined( KOKKOS_HAVE_CUDA )
class CudaSpace ; ///< Memory space on Cuda GPU
class CudaUVMSpace ; ///< Memory space on Cuda GPU with UVM
class CudaHostPinnedSpace ; ///< Memory space on Host accessible to Cuda GPU
class Cuda ; ///< Execution space for Cuda GPU
#endif
template<class ExecutionSpace, class MemorySpace>
struct Device;
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Set the default execution space.
/// Define Kokkos::DefaultExecutionSpace as per configuration option
/// or chosen from the enabled execution spaces in the following order:
/// Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Serial
namespace Kokkos {
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
typedef Cuda DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef OpenMP DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Threads DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Serial DefaultExecutionSpace ;
#else
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
#endif
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef OpenMP DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Threads DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Serial DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_OPENMP )
typedef OpenMP DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_PTHREAD )
typedef Threads DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_SERIAL )
typedef Serial DefaultHostExecutionSpace ;
#else
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
#endif
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Detect the active execution space and define its memory space.
// This is used to verify whether a running kernel can access
// a given memory space.
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_HAVE_CUDA)
typedef Kokkos::CudaSpace ActiveExecutionMemorySpace ;
#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
typedef Kokkos::HostSpace ActiveExecutionMemorySpace ;
#else
typedef void ActiveExecutionMemorySpace ;
#endif
template< class ActiveSpace , class MemorySpace >
struct VerifyExecutionCanAccessMemorySpace {
enum {value = 0};
};
template< class Space >
struct VerifyExecutionCanAccessMemorySpace< Space , Space >
{
enum {value = 1};
KOKKOS_INLINE_FUNCTION static void verify(void) {}
KOKKOS_INLINE_FUNCTION static void verify(const void *) {}
};
} // namespace Impl
} // namespace Kokkos
#define KOKKOS_RESTRICT_EXECUTION_TO_DATA( DATA_SPACE , DATA_PTR ) \
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \
Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify( DATA_PTR )
#define KOKKOS_RESTRICT_EXECUTION_TO_( DATA_SPACE ) \
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \
Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify()
namespace Kokkos {
void fence();
}
#endif /* #ifndef KOKKOS_CORE_FWD_HPP */

View File

@ -1,268 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_HPP
#define KOKKOS_CUDA_HPP
#include <Kokkos_Core_fwd.hpp>
// If CUDA execution space is enabled then use this header file.
#if defined( KOKKOS_HAVE_CUDA )
#include <iosfwd>
#include <vector>
#include <Kokkos_CudaSpace.hpp>
#include <Kokkos_Parallel.hpp>
#include <Kokkos_Layout.hpp>
#include <Kokkos_ScratchSpace.hpp>
#include <Kokkos_MemoryTraits.hpp>
#include <impl/Kokkos_Tags.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
class CudaExec ;
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/// \class Cuda
/// \brief Kokkos Execution Space that uses CUDA to run on GPUs.
///
/// An "execution space" represents a parallel execution model. It tells Kokkos
/// how to parallelize the execution of kernels in a parallel_for or
/// parallel_reduce. For example, the Threads execution space uses Pthreads or
/// C++11 threads on a CPU, the OpenMP execution space uses the OpenMP language
/// extensions, and the Serial execution space executes "parallel" kernels
/// sequentially. The Cuda execution space uses NVIDIA's CUDA programming
/// model to execute kernels in parallel on GPUs.
class Cuda {
public:
//! \name Type declarations that all Kokkos execution spaces must provide.
//@{
//! Tag this class as a kokkos execution space
typedef Cuda execution_space ;
#if defined( KOKKOS_USE_CUDA_UVM )
//! This execution space's preferred memory space.
typedef CudaUVMSpace memory_space ;
#else
//! This execution space's preferred memory space.
typedef CudaSpace memory_space ;
#endif
//! This execution space preferred device_type
typedef Kokkos::Device<execution_space,memory_space> device_type;
//! The size_type best suited for this execution space.
typedef memory_space::size_type size_type ;
//! This execution space's preferred array layout.
typedef LayoutLeft array_layout ;
//!
typedef ScratchMemorySpace< Cuda > scratch_memory_space ;
//@}
//--------------------------------------------------
//! \name Functions that all Kokkos devices must implement.
//@{
/// \brief True if and only if this method is being called in a
/// thread-parallel function.
KOKKOS_INLINE_FUNCTION static int in_parallel() {
#if defined( __CUDA_ARCH__ )
return true;
#else
return false;
#endif
}
/** \brief Set the device in a "sleep" state.
*
* This function sets the device in a "sleep" state in which it is
* not ready for work. This may consume less resources than if the
* device were in an "awake" state, but it may also take time to
* bring the device from a sleep state to be ready for work.
*
* \return True if the device is in the "sleep" state, else false if
* the device is actively working and could not enter the "sleep"
* state.
*/
static bool sleep();
/// \brief Wake the device from the 'sleep' state so it is ready for work.
///
/// \return True if the device is in the "ready" state, else "false"
/// if the device is actively working (which also means that it's
/// awake).
static bool wake();
/// \brief Wait until all dispatched functors complete.
///
/// The parallel_for or parallel_reduce dispatch of a functor may
/// return asynchronously, before the functor completes. This
/// method does not return until all dispatched functors on this
/// device have completed.
static void fence();
//! Free any resources being consumed by the device.
static void finalize();
//! Has been initialized
static int is_initialized();
//! Print configuration information to the given output stream.
static void print_configuration( std::ostream & , const bool detail = false );
//@}
//--------------------------------------------------
//! \name Cuda space instances
~Cuda() {}
Cuda();
explicit Cuda( const int instance_id );
Cuda( const Cuda & ) = default ;
Cuda( Cuda && ) = default ;
Cuda & operator = ( const Cuda & ) = default ;
Cuda & operator = ( Cuda && ) = default ;
//--------------------------------------------------------------------------
//! \name Device-specific functions
//@{
struct SelectDevice {
int cuda_device_id ;
SelectDevice() : cuda_device_id(0) {}
explicit SelectDevice( int id ) : cuda_device_id( id ) {}
};
//! Initialize, telling the CUDA run-time library which device to use.
static void initialize( const SelectDevice = SelectDevice()
, const size_t num_instances = 1 );
/// \brief Cuda device architecture of the selected device.
///
/// This matches the __CUDA_ARCH__ specification.
static size_type device_arch();
//! Query device count.
static size_type detect_device_count();
/** \brief Detect the available devices and their architecture
* as defined by the __CUDA_ARCH__ specification.
*/
static std::vector<unsigned> detect_device_arch();
cudaStream_t cuda_stream() const { return m_stream ; }
int cuda_device() const { return m_device ; }
//@}
//--------------------------------------------------------------------------
private:
cudaStream_t m_stream ;
int m_device ;
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
template<>
struct VerifyExecutionCanAccessMemorySpace
< Kokkos::CudaSpace
, Kokkos::Cuda::scratch_memory_space
>
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
};
template<>
struct VerifyExecutionCanAccessMemorySpace
< Kokkos::HostSpace
, Kokkos::Cuda::scratch_memory_space
>
{
enum { value = false };
inline static void verify( void ) { CudaSpace::access_error(); }
inline static void verify( const void * p ) { CudaSpace::access_error(p); }
};
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
#include <Cuda/Kokkos_CudaExec.hpp>
#include <Cuda/Kokkos_Cuda_View.hpp>
#include <KokkosExp_View.hpp>
#include <Cuda/KokkosExp_Cuda_View.hpp>
#include <Cuda/Kokkos_Cuda_Parallel.hpp>
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_HPP */

View File

@ -1,790 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDASPACE_HPP
#define KOKKOS_CUDASPACE_HPP
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#include <iosfwd>
#include <typeinfo>
#include <string>
#include <Kokkos_HostSpace.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <Cuda/Kokkos_Cuda_abort.hpp>
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/** \brief Cuda on-device memory management */
class CudaSpace {
public:
//! Tag this class as a kokkos memory space
typedef CudaSpace memory_space ;
typedef Kokkos::Cuda execution_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
typedef Impl::CudaMallocAllocator allocator;
/** \brief Allocate a contiguous block of memory.
*
* The input label is associated with the block of memory.
* The block of memory is tracked via reference counting where
* allocation gives it a reference count of one.
*/
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
/*--------------------------------*/
/** \brief Cuda specific function to attached texture object to an allocation.
* Output the texture object, base pointer, and offset from the input pointer.
*/
#if defined( __CUDACC__ )
static void texture_object_attach( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
);
#endif
/*--------------------------------*/
CudaSpace();
CudaSpace( const CudaSpace & rhs ) = default ;
CudaSpace & operator = ( const CudaSpace & rhs ) = default ;
~CudaSpace() = default ;
/**\brief Allocate memory in the cuda space */
void * allocate( const size_t arg_alloc_size ) const ;
/**\brief Deallocate memory in the cuda space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
/*--------------------------------*/
/** \brief Error reporting for HostSpace attempt to access CudaSpace */
static void access_error();
static void access_error( const void * const );
private:
int m_device ; ///< Which Cuda device
// friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ;
};
namespace Impl {
/// \brief Initialize lock array for arbitrary size atomics.
///
/// Arbitrary atomics are implemented using a hash table of locks
/// where the hash value is derived from the address of the
/// object for which an atomic operation is performed.
/// This function initializes the locks to zero (unset).
void init_lock_array_cuda_space();
/// \brief Retrieve the pointer to the lock array for arbitrary size atomics.
///
/// Arbitrary atomics are implemented using a hash table of locks
/// where the hash value is derived from the address of the
/// object for which an atomic operation is performed.
/// This function retrieves the lock array pointer.
/// If the array is not yet allocated it will do so.
int* lock_array_cuda_space_ptr(bool deallocate = false);
}
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/** \brief Cuda memory that is accessible to Host execution space
* through Cuda's unified virtual memory (UVM) runtime.
*/
class CudaUVMSpace {
public:
//! Tag this class as a kokkos memory space
typedef CudaUVMSpace memory_space ;
typedef Cuda execution_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
/** \brief If UVM capability is available */
static bool available();
typedef Impl::CudaUVMAllocator allocator;
/** \brief Allocate a contiguous block of memory.
*
* The input label is associated with the block of memory.
* The block of memory is tracked via reference counting where
* allocation gives it a reference count of one.
*/
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
/** \brief Cuda specific function to attached texture object to an allocation.
* Output the texture object, base pointer, and offset from the input pointer.
*/
#if defined( __CUDACC__ )
static void texture_object_attach( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
);
#endif
/*--------------------------------*/
CudaUVMSpace();
CudaUVMSpace( const CudaUVMSpace & rhs ) = default ;
CudaUVMSpace & operator = ( const CudaUVMSpace & rhs ) = default ;
~CudaUVMSpace() = default ;
/**\brief Allocate memory in the cuda space */
void * allocate( const size_t arg_alloc_size ) const ;
/**\brief Deallocate memory in the cuda space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
/*--------------------------------*/
private:
int m_device ; ///< Which Cuda device
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/** \brief Host memory that is accessible to Cuda execution space
* through Cuda's host-pinned memory allocation.
*/
class CudaHostPinnedSpace {
public:
//! Tag this class as a kokkos memory space
/** \brief Memory is in HostSpace so use the HostSpace::execution_space */
typedef HostSpace::execution_space execution_space ;
typedef CudaHostPinnedSpace memory_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
typedef Impl::CudaHostAllocator allocator ;
/** \brief Allocate a contiguous block of memory.
*
* The input label is associated with the block of memory.
* The block of memory is tracked via reference counting where
* allocation gives it a reference count of one.
*/
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
/*--------------------------------*/
CudaHostPinnedSpace();
CudaHostPinnedSpace( const CudaHostPinnedSpace & rhs ) = default ;
CudaHostPinnedSpace & operator = ( const CudaHostPinnedSpace & rhs ) = default ;
~CudaHostPinnedSpace() = default ;
/**\brief Allocate memory in the cuda space */
void * allocate( const size_t arg_alloc_size ) const ;
/**\brief Deallocate memory in the cuda space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
/*--------------------------------*/
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
void DeepCopyAsyncCuda( void * dst , const void * src , size_t n);
template<> struct DeepCopy< CudaSpace , CudaSpace , Cuda>
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< CudaSpace , HostSpace , Cuda >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< HostSpace , CudaSpace , Cuda >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
};
template<class ExecutionSpace> struct DeepCopy< CudaSpace , CudaSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaSpace , HostSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace , Cuda>( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< HostSpace , CudaSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaSpace , CudaUVMSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaSpace , CudaHostPinnedSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaUVMSpace , CudaSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaUVMSpace , CudaUVMSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< CudaUVMSpace , CudaHostPinnedSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaUVMSpace , HostSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaUVMSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaHostPinnedSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , HostSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaUVMSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaHostPinnedSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopyAsyncCuda (dst,src,n);
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** Running in CudaSpace attempting to access HostSpace: error */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::HostSpace >
{
enum { value = false };
KOKKOS_INLINE_FUNCTION static void verify( void )
{ Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
KOKKOS_INLINE_FUNCTION static void verify( const void * )
{ Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
};
/** Running in CudaSpace accessing CudaUVMSpace: ok */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
};
/** Running in CudaSpace accessing CudaHostPinnedSpace: ok */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
};
/** Running in CudaSpace attempting to access an unknown space: error */
template< class OtherSpace >
struct VerifyExecutionCanAccessMemorySpace<
typename enable_if< ! is_same<Kokkos::CudaSpace,OtherSpace>::value , Kokkos::CudaSpace >::type ,
OtherSpace >
{
enum { value = false };
KOKKOS_INLINE_FUNCTION static void verify( void )
{ Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
KOKKOS_INLINE_FUNCTION static void verify( const void * )
{ Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
};
//----------------------------------------------------------------------------
/** Running in HostSpace attempting to access CudaSpace */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaSpace >
{
enum { value = false };
inline static void verify( void ) { CudaSpace::access_error(); }
inline static void verify( const void * p ) { CudaSpace::access_error(p); }
};
/** Running in HostSpace accessing CudaUVMSpace is OK */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaUVMSpace >
{
enum { value = true };
inline static void verify( void ) { }
inline static void verify( const void * ) { }
};
/** Running in HostSpace accessing CudaHostPinnedSpace is OK */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) {}
KOKKOS_INLINE_FUNCTION static void verify( const void * ) {}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
template<>
class SharedAllocationRecord< Kokkos::CudaSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
friend class SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static ::cudaTextureObject_t
attach_texture_object( const unsigned sizeof_alias
, void * const alloc_ptr
, const size_t alloc_size );
static RecordBase s_root_record ;
::cudaTextureObject_t m_tex_obj ;
const Kokkos::CudaSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
);
template< typename AliasType >
inline
::cudaTextureObject_t attach_texture_object()
{
static_assert( ( std::is_same< AliasType , int >::value ||
std::is_same< AliasType , ::int2 >::value ||
std::is_same< AliasType , ::int4 >::value )
, "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
if ( m_tex_obj == 0 ) {
m_tex_obj = attach_texture_object( sizeof(AliasType)
, (void*) RecordBase::m_alloc_ptr
, RecordBase::m_alloc_size );
}
return m_tex_obj ;
}
template< typename AliasType >
inline
int attach_texture_object_offset( const AliasType * const ptr )
{
// Texture object is attached to the entire allocation range
return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
}
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::CudaSpace & , bool detail = false );
};
template<>
class SharedAllocationRecord< Kokkos::CudaUVMSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static RecordBase s_root_record ;
::cudaTextureObject_t m_tex_obj ;
const Kokkos::CudaUVMSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
);
template< typename AliasType >
inline
::cudaTextureObject_t attach_texture_object()
{
static_assert( ( std::is_same< AliasType , int >::value ||
std::is_same< AliasType , ::int2 >::value ||
std::is_same< AliasType , ::int4 >::value )
, "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
if ( m_tex_obj == 0 ) {
m_tex_obj = SharedAllocationRecord< Kokkos::CudaSpace , void >::
attach_texture_object( sizeof(AliasType)
, (void*) RecordBase::m_alloc_ptr
, RecordBase::m_alloc_size );
}
return m_tex_obj ;
}
template< typename AliasType >
inline
int attach_texture_object_offset( const AliasType * const ptr )
{
// Texture object is attached to the entire allocation range
return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
}
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::CudaUVMSpace & , bool detail = false );
};
template<>
class SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static RecordBase s_root_record ;
const Kokkos::CudaHostPinnedSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_space() {}
SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
);
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::CudaHostPinnedSpace & , bool detail = false );
};
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #define KOKKOS_CUDASPACE_HPP */

Some files were not shown because too many files have changed in this diff Show More