Updating Kokkos library--first deleting old folder

git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@13921 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
stamoor
2015-08-19 22:12:32 +00:00
parent 27b4742935
commit e2ac7b2352
196 changed files with 0 additions and 68618 deletions

View File

@ -1,40 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -1,40 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -1,318 +0,0 @@
# Default settings common options
KOKKOS_PATH=../../lib/kokkos
#Options: OpenMP,Serial,Pthreads,Cuda
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthreads"
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,ARMv8,BGQ,Power7,Power8
KOKKOS_ARCH ?= ""
#Options: yes,no
KOKKOS_DEBUG ?= "no"
#Options: hwloc,librt
KOKKOS_USE_TPLS ?= ""
#Default settings specific options
#Options: force_uvm,use_ldg,rdc
KOKKOS_CUDA_OPTIONS ?= ""
# Check for general settings
KOKKOS_CXX_STANDARD ?= "c++11"
KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
KOKKOS_INTERNAL_ENABLE_PROFILING_COLLECT_KERNEL_DATA := $(strip $(shell echo $(KOKKOS_PROFILING) | grep "kernel_times" | wc -l))
KOKKOS_INTERNAL_ENABLE_PROFILING_AGGREGATE_MPI := $(strip $(shell echo $(KOKKOS_PROFILING) | grep "aggregate_mpi" | wc -l))
KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
# Check for external libraries
KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l))
# Check for advanced settings
KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
# Check for Kokkos Host Execution Spaces one of which must be on
KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l))
KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l))
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
KOKKOS_INTERNAL_USE_SERIAL := 1
endif
endif
KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version | grep PGI | wc -l)
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -mp
else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_INTERNAL_CXX11_FLAG := --c++11
else
KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
endif
# Check for other Execution Spaces
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
# Check for Kokkos Architecture settings
#Intel based
KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l))
KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
#NVIDIA based
KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l))
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
endif
#ARM based
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l))
#IBM based
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc))
#AMD based
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
#Any AVX?
KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW) | bc ))
#Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc ))
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
$(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1)
$(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
endif
#Generating the list of Flags
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
# No warnings:
KOKKOS_CXXFLAGS =
# INTEL and CLANG warnings:
#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
# GCC warnings:
#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized -Wignored-qualifiers -Wempty-body -Wclobbered
KOKKOS_LIBS = -lkokkos
KOKKOS_LDFLAGS = -L$(shell pwd)
KOKKOS_SRC =
KOKKOS_HEADERS =
#Generating the KokkosCore_config.h file
tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp)
tmp := $(shell date >> KokkosCore_config.tmp)
tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp)
tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
endif
tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_CXXFLAGS += -G
endif
KOKKOS_CXXFLAGS += -g
KOKKOS_LDFLAGS += -g -ldl
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
KOKKOS_LIBS += -lhwloc
tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp )
KOKKOS_LIBS += -lrt
endif
tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += --relocatable-device-code=true
KOKKOS_LDFLAGS += --relocatable-device-code=true
endif
#Add Architecture flags
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
KOKKOS_CXXFLAGS += -mavx
KOKKOS_LDFLAGS += -mavx
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
KOKKOS_CXXFLAGS += -xcore-avx2
KOKKOS_LDFLAGS += -xcore-avx2
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
KOKKOS_CXXFLAGS += -mmic
KOKKOS_LDFLAGS += -mmic
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
KOKKOS_CXXFLAGS += -arch=sm_30
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
KOKKOS_CXXFLAGS += -arch=sm_32
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
KOKKOS_CXXFLAGS += -arch=sm_35
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
KOKKOS_CXXFLAGS += -arch=sm_37
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
KOKKOS_CXXFLAGS += -arch=sm_50
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
KOKKOS_CXXFLAGS += -arch=sm_52
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
KOKKOS_CXXFLAGS += -arch=sm_53
endif
endif
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h)
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l))
else
KOKKOS_INTERNAL_NEW_CONFIG := 1
endif
ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h)
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
KOKKOS_LIBS += -lcudart -lcuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_LIBS += -lpthread
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
else
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
endif
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
endif
# Setting up dependencies
KokkosCore_config.h:
KOKKOS_CPP_DEPENDS := KokkosCore_config.h $(KOKKOS_HEADERS)
KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o)
KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ))
include $(KOKKOS_PATH)/Makefile.targets
kokkos-clean:
rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
KOKKOS_LINK_DEPENDS=libkokkos.a

View File

@ -1,50 +0,0 @@
Kokkos_UnorderedMap_impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
Kokkos_AllocationTracker.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp
Kokkos_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp
Kokkos_Core.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp
Kokkos_Error.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp
Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
Kokkos_Cuda_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
Kokkos_Cuda_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
Kokkos_OpenMPexec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
endif

View File

@ -1,97 +0,0 @@
Kokkos implements a programming model in C++ for writing performance portable
applications targeting all major HPC platforms. For that purpose it provides
abstractions for both parallel execution of code and data management.
Kokkos is designed to target complex node architectures with N-level memory
hierarchies and multiple types of execution resources. It currently can use
OpenMP, Pthreads and CUDA as backend programming models.
The core developers of Kokkos are Carter Edwards and Christian Trott
at the Computer Science Research Institute of the Sandia National
Laboratories.
The KokkosP interface and associated tools are developed by the Application
Performance Team and Kokkos core developers at Sandia National Laboratories.
To learn more about Kokkos consider watching one of our presentations:
GTC 2015:
http://on-demand.gputechconf.com/gtc/2015/video/S5166.html
http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf
A programming guide can be found under doc/Kokkos_PG.pdf. This is an initial version
and feedback is greatly appreciated.
For questions please send an email to
kokkos-users@software.sandia.gov
For non-public questions send an email to
hcedwar(at)sandia.gov and crtrott(at)sandia.gov
============================================================================
====Requirements============================================================
============================================================================
Primary tested compilers are:
GCC 4.7.2
GCC 5.1.0
Intel 14.0.1
Intel 15.0.1
Clang 3.7.0
Secondary tested compilers are:
CUDA 6.5
CUDA 7.0
Primary tested compiler are passing in release mode
with warnings as errors. We are using the following set
of flags:
GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits
-Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
============================================================================
====Getting started=========================================================
============================================================================
In the 'example/tutorial' directory you will find step by step tutorial
examples which explain many of the features of Kokkos. They work with
simple Makefiles. To build with g++ and OpenMP simply type 'make openmp'
in the 'example/tutorial' directory. This will build all examples in the
subfolders.
============================================================================
====Running Unit Tests======================================================
============================================================================
To run the unit tests create a build directory and run the following commands
KOKKOS_PATH/generate_makefile.bash
make build-test
make test
Run KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
changing the device type for which to build.
============================================================================
====Install the library=====================================================
============================================================================
To install Kokkos as a library create a build directory and run the following
KOKKOS_PATH/generate_makefile.bash --prefix=INSTALL_PATH
make lib
make install
KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
changing the device type for which to build.
============================================================================
====CMakeFiles==============================================================
============================================================================
The CMake files contained in this repository require Tribits and are used
for integration with Trilinos. They do not currently support a standalone
CMake build.

File diff suppressed because it is too large Load Diff

View File

@ -1,496 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SORT_HPP_
#define KOKKOS_SORT_HPP_
#include <Kokkos_Core.hpp>
#include <algorithm>
namespace Kokkos {
namespace SortImpl {
template<class ValuesViewType, int Rank=ValuesViewType::Rank>
struct CopyOp;
template<class ValuesViewType>
struct CopyOp<ValuesViewType,1> {
template<class DstType, class SrcType>
KOKKOS_INLINE_FUNCTION
static void copy(DstType& dst, size_t i_dst,
SrcType& src, size_t i_src ) {
dst(i_dst) = src(i_src);
}
};
template<class ValuesViewType>
struct CopyOp<ValuesViewType,2> {
template<class DstType, class SrcType>
KOKKOS_INLINE_FUNCTION
static void copy(DstType& dst, size_t i_dst,
SrcType& src, size_t i_src ) {
for(int j = 0;j< (int) dst.dimension_1(); j++)
dst(i_dst,j) = src(i_src,j);
}
};
template<class ValuesViewType>
struct CopyOp<ValuesViewType,3> {
template<class DstType, class SrcType>
KOKKOS_INLINE_FUNCTION
static void copy(DstType& dst, size_t i_dst,
SrcType& src, size_t i_src ) {
for(int j = 0; j<dst.dimension_1(); j++)
for(int k = 0; k<dst.dimension_2(); k++)
dst(i_dst,j,k) = src(i_src,j,k);
}
};
}
template<class KeyViewType, class BinSortOp, class ExecutionSpace = typename KeyViewType::execution_space,
class SizeType = typename KeyViewType::memory_space::size_type>
class BinSort {
public:
template<class ValuesViewType, class PermuteViewType, class CopyOp>
struct bin_sort_sort_functor {
typedef ExecutionSpace execution_space;
typedef typename ValuesViewType::non_const_type values_view_type;
typedef typename ValuesViewType::const_type const_values_view_type;
Kokkos::View<typename values_view_type::const_data_type,typename values_view_type::array_layout,
typename values_view_type::memory_space,Kokkos::MemoryTraits<Kokkos::RandomAccess> > values;
values_view_type sorted_values;
typename PermuteViewType::const_type sort_order;
bin_sort_sort_functor(const_values_view_type values_, values_view_type sorted_values_, PermuteViewType sort_order_):
values(values_),sorted_values(sorted_values_),sort_order(sort_order_) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
//printf("Sort: %i %i\n",i,sort_order(i));
CopyOp::copy(sorted_values,i,values,sort_order(i));
}
};
typedef ExecutionSpace execution_space;
typedef BinSortOp bin_op_type;
struct bin_count_tag {};
struct bin_offset_tag {};
struct bin_binning_tag {};
struct bin_sort_bins_tag {};
public:
typedef SizeType size_type;
typedef size_type value_type;
typedef Kokkos::View<size_type*, execution_space> offset_type;
typedef Kokkos::View<const int*, execution_space> bin_count_type;
typedef Kokkos::View<typename KeyViewType::const_data_type,
typename KeyViewType::array_layout,
typename KeyViewType::memory_space> const_key_view_type;
typedef Kokkos::View<typename KeyViewType::const_data_type,
typename KeyViewType::array_layout,
typename KeyViewType::memory_space,
Kokkos::MemoryTraits<Kokkos::RandomAccess> > const_rnd_key_view_type;
typedef typename KeyViewType::non_const_value_type non_const_key_scalar;
typedef typename KeyViewType::const_value_type const_key_scalar;
private:
const_key_view_type keys;
const_rnd_key_view_type keys_rnd;
public:
BinSortOp bin_op;
offset_type bin_offsets;
Kokkos::View<int*, ExecutionSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > bin_count_atomic;
bin_count_type bin_count_const;
offset_type sort_order;
bool sort_within_bins;
public:
// Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false)
BinSort(const_key_view_type keys_, BinSortOp bin_op_,
bool sort_within_bins_ = false)
:keys(keys_),keys_rnd(keys_), bin_op(bin_op_) {
bin_count_atomic = Kokkos::View<int*, ExecutionSpace >("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins());
bin_count_const = bin_count_atomic;
bin_offsets = offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins());
sort_order = offset_type("PermutationVector",keys.dimension_0());
sort_within_bins = sort_within_bins_;
}
// Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed
void create_permute_vector() {
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_count_tag> (0,keys.dimension_0()),*this);
Kokkos::parallel_scan(Kokkos::RangePolicy<ExecutionSpace,bin_offset_tag> (0,bin_op.max_bins()) ,*this);
Kokkos::deep_copy(bin_count_atomic,0);
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_binning_tag> (0,keys.dimension_0()),*this);
if(sort_within_bins)
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this);
}
// Sort a view with respect ot the first dimension using the permutation array
template<class ValuesViewType>
void sort(ValuesViewType values) {
ValuesViewType sorted_values = ValuesViewType("Copy",
values.dimension_0(),
values.dimension_1(),
values.dimension_2(),
values.dimension_3(),
values.dimension_4(),
values.dimension_5(),
values.dimension_6(),
values.dimension_7());
parallel_for(values.dimension_0(),
bin_sort_sort_functor<ValuesViewType, offset_type,
SortImpl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
deep_copy(values,sorted_values);
}
// Get the permutation vector
KOKKOS_INLINE_FUNCTION
offset_type get_permute_vector() const { return sort_order;}
// Get the start offsets for each bin
KOKKOS_INLINE_FUNCTION
offset_type get_bin_offsets() const { return bin_offsets;}
// Get the count for each bin
KOKKOS_INLINE_FUNCTION
bin_count_type get_bin_count() const {return bin_count_const;}
public:
KOKKOS_INLINE_FUNCTION
void operator() (const bin_count_tag& tag, const int& i) const {
bin_count_atomic(bin_op.bin(keys,i))++;
}
KOKKOS_INLINE_FUNCTION
void operator() (const bin_offset_tag& tag, const int& i, value_type& offset, const bool& final) const {
if(final) {
bin_offsets(i) = offset;
}
offset+=bin_count_const(i);
}
KOKKOS_INLINE_FUNCTION
void operator() (const bin_binning_tag& tag, const int& i) const {
const int bin = bin_op.bin(keys,i);
const int count = bin_count_atomic(bin)++;
sort_order(bin_offsets(bin) + count) = i;
}
KOKKOS_INLINE_FUNCTION
void operator() (const bin_sort_bins_tag& tag, const int&i ) const {
bool sorted = false;
int upper_bound = bin_offsets(i)+bin_count_const(i);
while(!sorted) {
sorted = true;
int old_idx = sort_order(bin_offsets(i));
int new_idx;
for(int k=bin_offsets(i)+1; k<upper_bound; k++) {
new_idx = sort_order(k);
if(!bin_op(keys_rnd,old_idx,new_idx)) {
sort_order(k-1) = new_idx;
sort_order(k) = old_idx;
sorted = false;
} else {
old_idx = new_idx;
}
}
upper_bound--;
}
}
};
namespace SortImpl {
template<class KeyViewType>
struct DefaultBinOp1D {
const int max_bins_;
const double mul_;
typename KeyViewType::const_value_type range_;
typename KeyViewType::const_value_type min_;
//Construct BinOp with number of bins, minimum value and maxuimum value
DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
typename KeyViewType::const_value_type max )
:max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {}
//Determine bin index from key value
template<class ViewType>
KOKKOS_INLINE_FUNCTION
int bin(ViewType& keys, const int& i) const {
return int(mul_*(keys(i)-min_));
}
//Return maximum bin index + 1
KOKKOS_INLINE_FUNCTION
int max_bins() const {
return max_bins_;
}
//Compare to keys within a bin if true new_val will be put before old_val
template<class ViewType, typename iType1, typename iType2>
KOKKOS_INLINE_FUNCTION
bool operator()(ViewType& keys, iType1& i1, iType2& i2) const {
return keys(i1)<keys(i2);
}
};
template<class KeyViewType>
struct DefaultBinOp3D {
int max_bins_[3];
double mul_[3];
typename KeyViewType::non_const_value_type range_[3];
typename KeyViewType::non_const_value_type min_[3];
DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
typename KeyViewType::const_value_type max[] )
{
max_bins_[0] = max_bins__[0]+1;
max_bins_[1] = max_bins__[1]+1;
max_bins_[2] = max_bins__[2]+1;
mul_[0] = 1.0*max_bins__[0]/(max[0]-min[0]);
mul_[1] = 1.0*max_bins__[1]/(max[1]-min[1]);
mul_[2] = 1.0*max_bins__[2]/(max[2]-min[2]);
range_[0] = max[0]-min[0];
range_[1] = max[1]-min[1];
range_[2] = max[2]-min[2];
min_[0] = min[0];
min_[1] = min[1];
min_[2] = min[2];
}
template<class ViewType>
KOKKOS_INLINE_FUNCTION
int bin(ViewType& keys, const int& i) const {
return int( (((int(mul_[0]*(keys(i,0)-min_[0]))*max_bins_[1]) +
int(mul_[1]*(keys(i,1)-min_[1])))*max_bins_[2]) +
int(mul_[2]*(keys(i,2)-min_[2])));
}
KOKKOS_INLINE_FUNCTION
int max_bins() const {
return max_bins_[0]*max_bins_[1]*max_bins_[2];
}
template<class ViewType, typename iType1, typename iType2>
KOKKOS_INLINE_FUNCTION
bool operator()(ViewType& keys, iType1& i1 , iType2& i2) const {
if (keys(i1,0)>keys(i2,0)) return true;
else if (keys(i1,0)==keys(i2,0)) {
if (keys(i1,1)>keys(i2,1)) return true;
else if (keys(i1,1)==keys(i2,2)) {
if (keys(i1,2)>keys(i2,2)) return true;
}
}
return false;
}
};
template<typename Scalar>
struct min_max {
Scalar min;
Scalar max;
bool init;
KOKKOS_INLINE_FUNCTION
min_max() {
min = 0;
max = 0;
init = 0;
}
KOKKOS_INLINE_FUNCTION
min_max (const min_max& val) {
min = val.min;
max = val.max;
init = val.init;
}
KOKKOS_INLINE_FUNCTION
min_max operator = (const min_max& val) {
min = val.min;
max = val.max;
init = val.init;
return *this;
}
KOKKOS_INLINE_FUNCTION
void operator+= (const Scalar& val) {
if(init) {
min = min<val?min:val;
max = max>val?max:val;
} else {
min = val;
max = val;
init = 1;
}
}
KOKKOS_INLINE_FUNCTION
void operator+= (const min_max& val) {
if(init && val.init) {
min = min<val.min?min:val.min;
max = max>val.max?max:val.max;
} else {
if(val.init) {
min = val.min;
max = val.max;
init = 1;
}
}
}
KOKKOS_INLINE_FUNCTION
void operator+= (volatile const Scalar& val) volatile {
if(init) {
min = min<val?min:val;
max = max>val?max:val;
} else {
min = val;
max = val;
init = 1;
}
}
KOKKOS_INLINE_FUNCTION
void operator+= (volatile const min_max& val) volatile {
if(init && val.init) {
min = min<val.min?min:val.min;
max = max>val.max?max:val.max;
} else {
if(val.init) {
min = val.min;
max = val.max;
init = 1;
}
}
}
};
template<class ViewType>
struct min_max_functor {
typedef typename ViewType::execution_space execution_space;
ViewType view;
typedef min_max<typename ViewType::non_const_value_type> value_type;
min_max_functor (const ViewType view_):view(view_) {
}
KOKKOS_INLINE_FUNCTION
void operator()(const size_t& i, value_type& val) const {
val += view(i);
}
};
template<class ViewType>
bool try_std_sort(ViewType view) {
bool possible = true;
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
size_t stride[8];
view.stride(stride);
#else
size_t stride[8] = { view.stride_0()
, view.stride_1()
, view.stride_2()
, view.stride_3()
, view.stride_4()
, view.stride_5()
, view.stride_6()
, view.stride_7()
};
#endif
possible = possible && Impl::is_same<typename ViewType::memory_space, HostSpace>::value;
possible = possible && (ViewType::Rank == 1);
possible = possible && (stride[0] == 1);
if(possible) {
std::sort(view.ptr_on_device(),view.ptr_on_device()+view.dimension_0());
}
return possible;
}
}
template<class ViewType>
void sort(ViewType view, bool always_use_kokkos_sort = false) {
if(!always_use_kokkos_sort) {
if(SortImpl::try_std_sort(view)) return;
}
typedef SortImpl::DefaultBinOp1D<ViewType> CompType;
SortImpl::min_max<typename ViewType::non_const_value_type> val;
parallel_reduce(view.dimension_0(),SortImpl::min_max_functor<ViewType>(view),val);
BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true);
bin_sort.create_permute_vector();
bin_sort.sort(view);
}
/*template<class ViewType, class Comparator>
void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) {
}*/
}
#endif

View File

@ -1,92 +0,0 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests
TEST_TARGETS =
TARGETS =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_Cuda
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_Threads
TEST_TARGETS += test-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_OpenMP
TEST_TARGETS += test-openmp
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_Serial
TEST_TARGETS += test-serial
endif
KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Cuda
KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Threads
KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_OpenMP
KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Serial
test-cuda: KokkosAlgorithms_UnitTest_Cuda
./KokkosAlgorithms_UnitTest_Cuda
test-threads: KokkosAlgorithms_UnitTest_Threads
./KokkosAlgorithms_UnitTest_Threads
test-openmp: KokkosAlgorithms_UnitTest_OpenMP
./KokkosAlgorithms_UnitTest_OpenMP
test-serial: KokkosAlgorithms_UnitTest_Serial
./KokkosAlgorithms_UnitTest_Serial
build_all: $(TARGETS)
test: $(TEST_TARGETS)
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc

View File

@ -1,110 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdint.h>
#include <iostream>
#include <iomanip>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#ifdef KOKKOS_HAVE_CUDA
#include <TestRandom.hpp>
#include <TestSort.hpp>
namespace Test {
class cuda : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase()
{
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};
void cuda_test_random_xorshift64( int num_draws )
{
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Cuda> >(num_draws);
}
void cuda_test_random_xorshift1024( int num_draws )
{
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Cuda> >(num_draws);
}
#define CUDA_RANDOM_XORSHIFT64( num_draws ) \
TEST_F( cuda, Random_XorShift64 ) { \
cuda_test_random_xorshift64(num_draws); \
}
#define CUDA_RANDOM_XORSHIFT1024( num_draws ) \
TEST_F( cuda, Random_XorShift1024 ) { \
cuda_test_random_xorshift1024(num_draws); \
}
#define CUDA_SORT_UNSIGNED( size ) \
TEST_F( cuda, SortUnsigned ) { \
Impl::test_sort< Kokkos::Cuda, unsigned >(size); \
}
CUDA_RANDOM_XORSHIFT64( 132141141 )
CUDA_RANDOM_XORSHIFT1024( 52428813 )
CUDA_SORT_UNSIGNED(171)
#undef CUDA_RANDOM_XORSHIFT64
#undef CUDA_RANDOM_XORSHIFT1024
#undef CUDA_SORT_UNSIGNED
}
#endif /* #ifdef KOKKOS_HAVE_CUDA */

View File

@ -1,102 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
namespace Test {
#ifdef KOKKOS_HAVE_OPENMP
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned threads_count = omp_get_max_threads();
if ( Kokkos::hwloc::available() ) {
threads_count = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa();
}
Kokkos::OpenMP::initialize( threads_count );
}
static void TearDownTestCase()
{
Kokkos::OpenMP::finalize();
}
};
#define OPENMP_RANDOM_XORSHIFT64( num_draws ) \
TEST_F( openmp, Random_XorShift64 ) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::OpenMP> >(num_draws); \
}
#define OPENMP_RANDOM_XORSHIFT1024( num_draws ) \
TEST_F( openmp, Random_XorShift1024 ) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::OpenMP> >(num_draws); \
}
#define OPENMP_SORT_UNSIGNED( size ) \
TEST_F( openmp, SortUnsigned ) { \
Impl::test_sort< Kokkos::OpenMP, unsigned >(size); \
}
OPENMP_RANDOM_XORSHIFT64( 10240000 )
OPENMP_RANDOM_XORSHIFT1024( 10130144 )
OPENMP_SORT_UNSIGNED(171)
#undef OPENMP_RANDOM_XORSHIFT64
#undef OPENMP_RANDOM_XORSHIFT1024
#undef OPENMP_SORT_UNSIGNED
#endif
} // namespace test

View File

@ -1,476 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_DUALVIEW_HPP
#define KOKKOS_TEST_DUALVIEW_HPP
#include <gtest/gtest.h>
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <cmath>
namespace Test {
namespace Impl{
// This test runs the random number generators and uses some statistic tests to
// check the 'goodness' of the random numbers:
// (i) mean: the mean is expected to be 0.5*RAND_MAX
// (ii) variance: the variance is 1/3*mean*mean
// (iii) covariance: the covariance is 0
// (iv) 1-tupledistr: the mean, variance and covariance of a 1D Histrogram of random numbers
// (v) 3-tupledistr: the mean, variance and covariance of a 3D Histrogram of random numbers
#define HIST_DIM3D 24
#define HIST_DIM1D (HIST_DIM3D*HIST_DIM3D*HIST_DIM3D)
struct RandomProperties {
uint64_t count;
double mean;
double variance;
double covariance;
double min;
double max;
KOKKOS_INLINE_FUNCTION
RandomProperties() {
count = 0;
mean = 0.0;
variance = 0.0;
covariance = 0.0;
min = 1e64;
max = -1e64;
}
KOKKOS_INLINE_FUNCTION
RandomProperties& operator+=(const RandomProperties& add) {
count += add.count;
mean += add.mean;
variance += add.variance;
covariance += add.covariance;
min = add.min<min?add.min:min;
max = add.max>max?add.max:max;
return *this;
}
KOKKOS_INLINE_FUNCTION
void operator+=(const volatile RandomProperties& add) volatile {
count += add.count;
mean += add.mean;
variance += add.variance;
covariance += add.covariance;
min = add.min<min?add.min:min;
max = add.max>max?add.max:max;
}
};
template<class GeneratorPool, class Scalar>
struct test_random_functor {
typedef typename GeneratorPool::generator_type rnd_type;
typedef RandomProperties value_type;
typedef typename GeneratorPool::device_type device_type;
GeneratorPool rand_pool;
const double mean;
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
// an exclusive upper bound on the range of random numbers that
// draw() can generate. However, for the float specialization, some
// implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each
// dimension, in the View types below.
typedef Kokkos::View<int[HIST_DIM1D+1],typename GeneratorPool::device_type> type_1d;
type_1d density_1d;
typedef Kokkos::View<int[HIST_DIM3D+1][HIST_DIM3D+1][HIST_DIM3D+1],typename GeneratorPool::device_type> type_3d;
type_3d density_3d;
test_random_functor (GeneratorPool rand_pool_, type_1d d1d, type_3d d3d) :
rand_pool (rand_pool_),
mean (0.5*Kokkos::rand<rnd_type,Scalar>::max ()),
density_1d (d1d),
density_3d (d3d)
{}
KOKKOS_INLINE_FUNCTION
void operator() (int i, RandomProperties& prop) const {
using Kokkos::atomic_fetch_add;
rnd_type rand_gen = rand_pool.get_state();
for (int k = 0; k < 1024; ++k) {
const Scalar tmp = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
prop.count++;
prop.mean += tmp;
prop.variance += (tmp-mean)*(tmp-mean);
const Scalar tmp2 = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
prop.count++;
prop.mean += tmp2;
prop.variance += (tmp2-mean)*(tmp2-mean);
prop.covariance += (tmp-mean)*(tmp2-mean);
const Scalar tmp3 = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
prop.count++;
prop.mean += tmp3;
prop.variance += (tmp3-mean)*(tmp3-mean);
prop.covariance += (tmp2-mean)*(tmp3-mean);
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to
// define an exclusive upper bound on the range of random
// numbers that draw() can generate. However, for the float
// specialization, some implementations might violate this upper
// bound, due to rounding error. Just in case, we have left an
// extra space at the end of each dimension of density_1d and
// density_3d.
//
// Please note that those extra entries might not get counted in
// the histograms. However, if Kokkos::rand is broken and only
// returns values of max(), the histograms will still catch this
// indirectly, since none of the other values will be filled in.
const Scalar theMax = Kokkos::rand<rnd_type, Scalar>::max ();
const uint64_t ind1_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp / theMax);
const uint64_t ind2_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp2 / theMax);
const uint64_t ind3_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp3 / theMax);
const uint64_t ind1_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp / theMax);
const uint64_t ind2_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp2 / theMax);
const uint64_t ind3_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp3 / theMax);
atomic_fetch_add (&density_1d(ind1_1d), 1);
atomic_fetch_add (&density_1d(ind2_1d), 1);
atomic_fetch_add (&density_1d(ind3_1d), 1);
atomic_fetch_add (&density_3d(ind1_3d, ind2_3d, ind3_3d), 1);
}
rand_pool.free_state(rand_gen);
}
};
template<class DeviceType>
struct test_histogram1d_functor {
typedef RandomProperties value_type;
typedef typename DeviceType::execution_space execution_space;
typedef typename DeviceType::memory_space memory_space;
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
// an exclusive upper bound on the range of random numbers that
// draw() can generate. However, for the float specialization, some
// implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each
// dimension, in the View type below.
typedef Kokkos::View<int[HIST_DIM1D+1], memory_space> type_1d;
type_1d density_1d;
double mean;
test_histogram1d_functor (type_1d d1d, int num_draws) :
density_1d (d1d),
mean (1.0*num_draws/HIST_DIM1D*3)
{
printf ("Mean: %e\n", mean);
}
KOKKOS_INLINE_FUNCTION void
operator() (const typename memory_space::size_type i,
RandomProperties& prop) const
{
typedef typename memory_space::size_type size_type;
const double count = density_1d(i);
prop.mean += count;
prop.variance += 1.0 * (count - mean) * (count - mean);
//prop.covariance += 1.0*count*count;
prop.min = count < prop.min ? count : prop.min;
prop.max = count > prop.max ? count : prop.max;
if (i < static_cast<size_type> (HIST_DIM1D-1)) {
prop.covariance += (count - mean) * (density_1d(i+1) - mean);
}
}
};
template<class DeviceType>
struct test_histogram3d_functor {
typedef RandomProperties value_type;
typedef typename DeviceType::execution_space execution_space;
typedef typename DeviceType::memory_space memory_space;
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
// an exclusive upper bound on the range of random numbers that
// draw() can generate. However, for the float specialization, some
// implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each
// dimension, in the View type below.
typedef Kokkos::View<int[HIST_DIM3D+1][HIST_DIM3D+1][HIST_DIM3D+1], memory_space> type_3d;
type_3d density_3d;
double mean;
test_histogram3d_functor (type_3d d3d, int num_draws) :
density_3d (d3d),
mean (1.0*num_draws/HIST_DIM1D)
{}
KOKKOS_INLINE_FUNCTION void
operator() (const typename memory_space::size_type i,
RandomProperties& prop) const
{
typedef typename memory_space::size_type size_type;
const double count = density_3d(i/(HIST_DIM3D*HIST_DIM3D),
(i % (HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D,
i % HIST_DIM3D);
prop.mean += count;
prop.variance += (count - mean) * (count - mean);
if (i < static_cast<size_type> (HIST_DIM1D-1)) {
const double count_next = density_3d((i+1)/(HIST_DIM3D*HIST_DIM3D),
((i+1)%(HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D,
(i+1)%HIST_DIM3D);
prop.covariance += (count - mean) * (count_next - mean);
}
}
};
//
// Templated test that uses the above functors.
//
template <class RandomGenerator,class Scalar>
struct test_random_scalar {
typedef typename RandomGenerator::generator_type rnd_type;
int pass_mean,pass_var,pass_covar;
int pass_hist1d_mean,pass_hist1d_var,pass_hist1d_covar;
int pass_hist3d_mean,pass_hist3d_var,pass_hist3d_covar;
test_random_scalar (typename test_random_functor<RandomGenerator,int>::type_1d& density_1d,
typename test_random_functor<RandomGenerator,int>::type_3d& density_3d,
RandomGenerator& pool,
unsigned int num_draws)
{
using std::cerr;
using std::endl;
using Kokkos::parallel_reduce;
{
cerr << " -- Testing randomness properties" << endl;
RandomProperties result;
typedef test_random_functor<RandomGenerator, Scalar> functor_type;
parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result);
//printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2);
double tolerance = 2.0*sqrt(1.0/num_draws);
double mean_expect = 0.5*Kokkos::rand<rnd_type,Scalar>::max();
double variance_expect = 1.0/3.0*mean_expect*mean_expect;
double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0;
double variance_eps = variance_expect/(result.variance/num_draws/3)-1.0;
double covariance_eps = result.covariance/num_draws/2/variance_expect;
pass_mean = ((-tolerance < mean_eps) &&
( tolerance > mean_eps)) ? 1:0;
pass_var = ((-tolerance < variance_eps) &&
( tolerance > variance_eps)) ? 1:0;
pass_covar = ((-1.4*tolerance < covariance_eps) &&
( 1.4*tolerance > covariance_eps)) ? 1:0;
cerr << "Pass: " << pass_mean
<< " " << pass_var
<< " " << mean_eps
<< " " << variance_eps
<< " " << covariance_eps
<< " || " << tolerance << endl;
}
{
cerr << " -- Testing 1-D histogram" << endl;
RandomProperties result;
typedef test_histogram1d_functor<typename RandomGenerator::device_type> functor_type;
parallel_reduce (HIST_DIM1D, functor_type (density_1d, num_draws), result);
double tolerance = 6*sqrt(1.0/HIST_DIM1D);
double mean_expect = 1.0*num_draws*3/HIST_DIM1D;
double variance_expect = 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D);
double covariance_expect = -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D;
double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0;
double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0;
double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect;
pass_hist1d_mean = ((-tolerance < mean_eps) &&
( tolerance > mean_eps)) ? 1:0;
pass_hist1d_var = ((-tolerance < variance_eps) &&
( tolerance > variance_eps)) ? 1:0;
pass_hist1d_covar = ((-tolerance < covariance_eps) &&
( tolerance > covariance_eps)) ? 1:0;
cerr << "Density 1D: " << mean_eps
<< " " << variance_eps
<< " " << (result.covariance/HIST_DIM1D/HIST_DIM1D)
<< " || " << tolerance
<< " " << result.min
<< " " << result.max
<< " || " << result.variance/HIST_DIM1D
<< " " << 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D)
<< " || " << result.covariance/HIST_DIM1D
<< " " << -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D
<< endl;
}
{
cerr << " -- Testing 3-D histogram" << endl;
RandomProperties result;
typedef test_histogram3d_functor<typename RandomGenerator::device_type> functor_type;
parallel_reduce (HIST_DIM1D, functor_type (density_3d, num_draws), result);
double tolerance = 6*sqrt(1.0/HIST_DIM1D);
double mean_expect = 1.0*num_draws/HIST_DIM1D;
double variance_expect = 1.0*num_draws/HIST_DIM1D*(1.0-1.0/HIST_DIM1D);
double covariance_expect = -1.0*num_draws/HIST_DIM1D/HIST_DIM1D;
double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0;
double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0;
double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect;
pass_hist3d_mean = ((-tolerance < mean_eps) &&
( tolerance > mean_eps)) ? 1:0;
pass_hist3d_var = ((-tolerance < variance_eps) &&
( tolerance > variance_eps)) ? 1:0;
pass_hist3d_covar = ((-tolerance < covariance_eps) &&
( tolerance > covariance_eps)) ? 1:0;
cerr << "Density 3D: " << mean_eps
<< " " << variance_eps
<< " " << result.covariance/HIST_DIM1D/HIST_DIM1D
<< " || " << tolerance
<< " " << result.min
<< " " << result.max << endl;
}
}
};
template <class RandomGenerator>
void test_random(unsigned int num_draws)
{
using std::cerr;
using std::endl;
typename test_random_functor<RandomGenerator,int>::type_1d density_1d("D1d");
typename test_random_functor<RandomGenerator,int>::type_3d density_3d("D3d");
cerr << "Test Scalar=int" << endl;
RandomGenerator pool(31891);
test_random_scalar<RandomGenerator,int> test_int(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_int.pass_mean,1);
ASSERT_EQ( test_int.pass_var,1);
ASSERT_EQ( test_int.pass_covar,1);
ASSERT_EQ( test_int.pass_hist1d_mean,1);
ASSERT_EQ( test_int.pass_hist1d_var,1);
ASSERT_EQ( test_int.pass_hist1d_covar,1);
ASSERT_EQ( test_int.pass_hist3d_mean,1);
ASSERT_EQ( test_int.pass_hist3d_var,1);
ASSERT_EQ( test_int.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=unsigned int" << endl;
test_random_scalar<RandomGenerator,unsigned int> test_uint(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_uint.pass_mean,1);
ASSERT_EQ( test_uint.pass_var,1);
ASSERT_EQ( test_uint.pass_covar,1);
ASSERT_EQ( test_uint.pass_hist1d_mean,1);
ASSERT_EQ( test_uint.pass_hist1d_var,1);
ASSERT_EQ( test_uint.pass_hist1d_covar,1);
ASSERT_EQ( test_uint.pass_hist3d_mean,1);
ASSERT_EQ( test_uint.pass_hist3d_var,1);
ASSERT_EQ( test_uint.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=int64_t" << endl;
test_random_scalar<RandomGenerator,int64_t> test_int64(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_int64.pass_mean,1);
ASSERT_EQ( test_int64.pass_var,1);
ASSERT_EQ( test_int64.pass_covar,1);
ASSERT_EQ( test_int64.pass_hist1d_mean,1);
ASSERT_EQ( test_int64.pass_hist1d_var,1);
ASSERT_EQ( test_int64.pass_hist1d_covar,1);
ASSERT_EQ( test_int64.pass_hist3d_mean,1);
ASSERT_EQ( test_int64.pass_hist3d_var,1);
ASSERT_EQ( test_int64.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=uint64_t" << endl;
test_random_scalar<RandomGenerator,uint64_t> test_uint64(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_uint64.pass_mean,1);
ASSERT_EQ( test_uint64.pass_var,1);
ASSERT_EQ( test_uint64.pass_covar,1);
ASSERT_EQ( test_uint64.pass_hist1d_mean,1);
ASSERT_EQ( test_uint64.pass_hist1d_var,1);
ASSERT_EQ( test_uint64.pass_hist1d_covar,1);
ASSERT_EQ( test_uint64.pass_hist3d_mean,1);
ASSERT_EQ( test_uint64.pass_hist3d_var,1);
ASSERT_EQ( test_uint64.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=float" << endl;
test_random_scalar<RandomGenerator,float> test_float(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_float.pass_mean,1);
ASSERT_EQ( test_float.pass_var,1);
ASSERT_EQ( test_float.pass_covar,1);
ASSERT_EQ( test_float.pass_hist1d_mean,1);
ASSERT_EQ( test_float.pass_hist1d_var,1);
ASSERT_EQ( test_float.pass_hist1d_covar,1);
ASSERT_EQ( test_float.pass_hist3d_mean,1);
ASSERT_EQ( test_float.pass_hist3d_var,1);
ASSERT_EQ( test_float.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=double" << endl;
test_random_scalar<RandomGenerator,double> test_double(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_double.pass_mean,1);
ASSERT_EQ( test_double.pass_var,1);
ASSERT_EQ( test_double.pass_covar,1);
ASSERT_EQ( test_double.pass_hist1d_mean,1);
ASSERT_EQ( test_double.pass_hist1d_var,1);
ASSERT_EQ( test_double.pass_hist1d_covar,1);
ASSERT_EQ( test_double.pass_hist3d_mean,1);
ASSERT_EQ( test_double.pass_hist3d_var,1);
ASSERT_EQ( test_double.pass_hist3d_covar,1);
}
}
} // namespace Test
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP

View File

@ -1,99 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
//----------------------------------------------------------------------------
namespace Test {
#ifdef KOKKOS_HAVE_SERIAL
class serial : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision (5) << std::scientific;
Kokkos::Serial::initialize ();
}
static void TearDownTestCase ()
{
Kokkos::Serial::finalize ();
}
};
#define SERIAL_RANDOM_XORSHIFT64( num_draws ) \
TEST_F( serial, Random_XorShift64 ) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Serial> >(num_draws); \
}
#define SERIAL_RANDOM_XORSHIFT1024( num_draws ) \
TEST_F( serial, Random_XorShift1024 ) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Serial> >(num_draws); \
}
#define SERIAL_SORT_UNSIGNED( size ) \
TEST_F( serial, SortUnsigned ) { \
Impl::test_sort< Kokkos::Serial, unsigned >(size); \
}
SERIAL_RANDOM_XORSHIFT64( 10240000 )
SERIAL_RANDOM_XORSHIFT1024( 10130144 )
SERIAL_SORT_UNSIGNED(171)
#undef SERIAL_RANDOM_XORSHIFT64
#undef SERIAL_RANDOM_XORSHIFT1024
#undef SERIAL_SORT_UNSIGNED
#endif // KOKKOS_HAVE_SERIAL
} // namespace Test

View File

@ -1,206 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef TESTSORT_HPP_
#define TESTSORT_HPP_
#include <gtest/gtest.h>
#include<Kokkos_Core.hpp>
#include<Kokkos_Random.hpp>
#include<Kokkos_Sort.hpp>
namespace Test {
namespace Impl{
template<class ExecutionSpace, class Scalar>
struct is_sorted_struct {
typedef unsigned int value_type;
typedef ExecutionSpace execution_space;
Kokkos::View<Scalar*,ExecutionSpace> keys;
is_sorted_struct(Kokkos::View<Scalar*,ExecutionSpace> keys_):keys(keys_) {}
KOKKOS_INLINE_FUNCTION
void operator() (int i, unsigned int& count) const {
if(keys(i)>keys(i+1)) count++;
}
};
template<class ExecutionSpace, class Scalar>
struct sum {
typedef double value_type;
typedef ExecutionSpace execution_space;
Kokkos::View<Scalar*,ExecutionSpace> keys;
sum(Kokkos::View<Scalar*,ExecutionSpace> keys_):keys(keys_) {}
KOKKOS_INLINE_FUNCTION
void operator() (int i, double& count) const {
count+=keys(i);
}
};
template<class ExecutionSpace, class Scalar>
struct bin3d_is_sorted_struct {
typedef unsigned int value_type;
typedef ExecutionSpace execution_space;
Kokkos::View<Scalar*[3],ExecutionSpace> keys;
int max_bins;
Scalar min;
Scalar max;
bin3d_is_sorted_struct(Kokkos::View<Scalar*[3],ExecutionSpace> keys_,int max_bins_,Scalar min_,Scalar max_):
keys(keys_),max_bins(max_bins_),min(min_),max(max_) {
}
KOKKOS_INLINE_FUNCTION
void operator() (int i, unsigned int& count) const {
int ix1 = int ((keys(i,0)-min)/max * max_bins);
int iy1 = int ((keys(i,1)-min)/max * max_bins);
int iz1 = int ((keys(i,2)-min)/max * max_bins);
int ix2 = int ((keys(i+1,0)-min)/max * max_bins);
int iy2 = int ((keys(i+1,1)-min)/max * max_bins);
int iz2 = int ((keys(i+1,2)-min)/max * max_bins);
if (ix1>ix2) count++;
else if(ix1==ix2) {
if (iy1>iy2) count++;
else if ((iy1==iy2) && (iz1>iz2)) count++;
}
}
};
template<class ExecutionSpace, class Scalar>
struct sum3D {
typedef double value_type;
typedef ExecutionSpace execution_space;
Kokkos::View<Scalar*[3],ExecutionSpace> keys;
sum3D(Kokkos::View<Scalar*[3],ExecutionSpace> keys_):keys(keys_) {}
KOKKOS_INLINE_FUNCTION
void operator() (int i, double& count) const {
count+=keys(i,0);
count+=keys(i,1);
count+=keys(i,2);
}
};
template<class ExecutionSpace, typename KeyType>
void test_1D_sort(unsigned int n,bool force_kokkos) {
typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType;
KeyViewType keys("Keys",n);
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND);
double sum_before = 0.0;
double sum_after = 0.0;
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys),sum_before);
Kokkos::sort(keys,force_kokkos);
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys),sum_after);
Kokkos::parallel_reduce(n-1,is_sorted_struct<ExecutionSpace, KeyType>(keys),sort_fails);
double ratio = sum_before/sum_after;
double epsilon = 1e-10;
unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0;
ASSERT_EQ(sort_fails,0);
ASSERT_EQ(equal_sum,1);
}
template<class ExecutionSpace, typename KeyType>
void test_3D_sort(unsigned int n) {
typedef Kokkos::View<KeyType*[3],ExecutionSpace > KeyViewType;
KeyViewType keys("Keys",n*n*n);
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
Kokkos::fill_random(keys,g,100.0);
double sum_before = 0.0;
double sum_after = 0.0;
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_before);
int bin_1d = 1;
while( bin_1d*bin_1d*bin_1d*4< (int) keys.dimension_0() ) bin_1d*=2;
int bin_max[3] = {bin_1d,bin_1d,bin_1d};
typename KeyViewType::value_type min[3] = {0,0,0};
typename KeyViewType::value_type max[3] = {100,100,100};
typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp;
BinOp bin_op(bin_max,min,max);
Kokkos::BinSort< KeyViewType , BinOp >
Sorter(keys,bin_op,false);
Sorter.create_permute_vector();
Sorter.template sort< KeyViewType >(keys);
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_after);
Kokkos::parallel_reduce(keys.dimension_0()-1,bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys,bin_1d,min[0],max[0]),sort_fails);
double ratio = sum_before/sum_after;
double epsilon = 1e-10;
unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0;
printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails);
ASSERT_EQ(sort_fails,0);
ASSERT_EQ(equal_sum,1);
}
template<class ExecutionSpace, typename KeyType>
void test_sort(unsigned int N)
{
test_1D_sort<ExecutionSpace,KeyType>(N*N*N, true);
test_1D_sort<ExecutionSpace,KeyType>(N*N*N, false);
test_3D_sort<ExecutionSpace,KeyType>(N);
}
}
}
#endif /* TESTSORT_HPP_ */

View File

@ -1,113 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
//----------------------------------------------------------------------------
namespace Test {
#ifdef KOKKOS_HAVE_PTHREAD
class threads : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count()
* Kokkos::hwloc::get_available_cores_per_numa()
// * Kokkos::hwloc::get_available_threads_per_core()
;
}
std::cout << "Threads: " << num_threads << std::endl;
Kokkos::Threads::initialize( num_threads );
}
static void TearDownTestCase()
{
Kokkos::Threads::finalize();
}
};
#define THREADS_RANDOM_XORSHIFT64( num_draws ) \
TEST_F( threads, Random_XorShift64 ) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Threads> >(num_draws); \
}
#define THREADS_RANDOM_XORSHIFT1024( num_draws ) \
TEST_F( threads, Random_XorShift1024 ) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Threads> >(num_draws); \
}
#define THREADS_SORT_UNSIGNED( size ) \
TEST_F( threads, SortUnsigned ) { \
Impl::test_sort< Kokkos::Threads, double >(size); \
}
THREADS_RANDOM_XORSHIFT64( 10240000 )
THREADS_RANDOM_XORSHIFT1024( 10130144 )
THREADS_SORT_UNSIGNED(171)
#undef THREADS_RANDOM_XORSHIFT64
#undef THREADS_RANDOM_XORSHIFT1024
#undef THREADS_SORT_UNSIGNED
#endif
} // namespace Test

View File

@ -1,50 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
int main(int argc, char *argv[]) {
::testing::InitGoogleTest(&argc,argv);
return RUN_ALL_TESTS();
}

View File

@ -1,185 +0,0 @@
#!/bin/bash
#
# This shell script (nvcc_wrapper) wraps both the host compiler and
# NVCC, if you are building Trilinos with CUDA enabled. The script
# remedies some differences between the interface of NVCC and that of
# the host compiler, in particular for linking. It also means that
# Trilinos doesn't need separate .cu files; it can just use .cpp
# files.
#
# Hopefully, at some point, NVIDIA may fix NVCC so as to make this
# script obsolete. For now, this script exists and if you want to
# build Trilinos with CUDA enabled, you must use this script as your
# compiler.
# Default settings: change those according to your machine. For
# example, you may have have two different wrappers with either icpc
# or g++ as their back-end compiler. The defaults can be overwritten
# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
default_arch="sm_35"
#default_arch="sm_50"
#
# The default C++ compiler.
#
default_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
#default_compiler="icpc"
#default_compiler="/usr/local/gcc/4.8.3/bin/g++"
#default_compiler="/usr/local/gcc/4.9.1/bin/g++"
#
# Internal variables
#
cpp_files=""
xcompiler_args=""
cuda_arg=""
xlinker_args=""
object_files=""
object_files_xlinker=""
first_host_option=1
arch_set=0
ccbin_set=0
nvcc_error_code=0
dry_run=0
replace_pragma_ident=0
#echo "Arguments: $# $@"
while [ $# -gt 0 ]
do
case $1 in
#show the executed command
--show)
dry_run=1
;;
#replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
--replace-pragma-ident)
replace_pragma_ident=1
;;
#handle source files to be compiled as cuda files
*.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
cpp_files="$cpp_files $1"
;;
#Handle known nvcc args
-O*|-D*|-gencode*|-c|-I*|-L*|-l*|-g|--help|--version|--dryrun|--verbose|--keep-dir|-E|-M|-G|--relocatable-device-code*|-shared|-lineinfo|-expt-extended-lambda|--resource-usage)
cuda_args="$cuda_args $1"
;;
#Handle c++11 setting
--std=c++11|-std=c++11)
cuda_args="$cuda_args $1"
;;
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
-std=c++98|--std=c++98)
;;
#Handle known nvcc args that have an argument
-o|-rdc|-maxrregcount|--default-stream)
cuda_args="$cuda_args $1 $2"
shift
;;
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
-pedantic|-Wpedantic|-ansi)
;;
#strip -Xcompiler because we add it
-Xcompiler)
;;
#strip of "-x cu" because we add that
-x)
if [[ $2 != "cu" ]]; then
xcompiler_args="$xcompiler_args,-x,$2"
fi
shift
;;
#Handle -ccbin (if its not set we can set it to a default value)
-ccbin)
cuda_args="$cuda_args $1 $2"
ccbin_set=1
shift
;;
#Handle -arch argument (if its not set use a default
-arch*)
cuda_args="$cuda_args $1"
arch_set=1
;;
#Handle -Xcudafe argument
-Xcudafe)
cuda_args="$cuda_args -Xcudafe $2"
shift
;;
#Handle args that should be sent to the linker
-Wl*)
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
;;
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
*.a|*.so|*.o|*.obj)
object_files="$object_files $1"
object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
*.so.*|*.dylib)
object_files_xlinker="$object_files_xlinker -Xlinker $1"
object_files="$object_files -Xlinker $1"
;;
#All other args are sent to the host compiler
*)
if [ $first_host_option -eq 0 ]; then
xcompiler_args="$xcompiler_args,$1"
else
xcompiler_args="-Xcompiler $1"
first_host_option=0
fi
;;
esac
shift
done
#Add default host compiler if necessary
if [ $ccbin_set -ne 1 ]; then
cuda_args="$cuda_args -ccbin $default_compiler"
fi
#Add architecture command
if [ $arch_set -ne 1 ]; then
cuda_args="$cuda_args -arch=$default_arch"
fi
#Compose compilation command
command="nvcc $cuda_args $xlinker_args $xcompiler_args"
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
if [ $replace_pragma_ident -eq 1 ]; then
cpp_files2=""
for file in $cpp_files
do
var=`grep pragma ${file} | grep ident | grep "#"`
if [ "${#var}" -gt 0 ]
then
sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > /tmp/nvcc_wrapper_tmp_$file
cpp_files2="$cpp_files2 /tmp/nvcc_wrapper_tmp_$file"
else
cpp_files2="$cpp_files2 $file"
fi
done
cpp_files=$cpp_files2
echo $cpp_files
fi
if [ "$cpp_files" ]; then
command="$command $object_files_xlinker -x cu $cpp_files"
else
command="$command $object_files"
fi
#Print command for dryrun
if [ $dry_run -eq 1 ]; then
echo $command
exit 0
fi
#Run compilation command
$command
nvcc_error_code=$?
#Report error code
exit $nvcc_error_code

View File

@ -1,81 +0,0 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/containers/performance_tests
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests
TEST_TARGETS =
TARGETS =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = TestCuda.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_Cuda
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = TestThreads.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_Threads
TEST_TARGETS += test-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_OpenMP
TEST_TARGETS += test-openmp
endif
KokkosContainers_PerformanceTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Cuda
KokkosContainers_PerformanceTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Threads
KokkosContainers_PerformanceTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_OpenMP
test-cuda: KokkosContainers_PerformanceTest_Cuda
./KokkosContainers_PerformanceTest_Cuda
test-threads: KokkosContainers_PerformanceTest_Threads
./KokkosContainers_PerformanceTest_Threads
test-openmp: KokkosContainers_PerformanceTest_OpenMP
./KokkosContainers_PerformanceTest_OpenMP
build_all: $(TARGETS)
test: $(TEST_TARGETS)
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc

View File

@ -1,100 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdint.h>
#include <string>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <fstream>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#include <Kokkos_UnorderedMap.hpp>
#include <TestGlobal2LocalIds.hpp>
#include <TestUnorderedMapPerformance.hpp>
namespace Performance {
class cuda : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase()
{
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};
TEST_F( cuda, global_2_local)
{
std::cout << "Cuda" << std::endl;
std::cout << "size, create, generate, fill, find" << std::endl;
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
test_global_to_local_ids<Kokkos::Cuda>(i);
}
TEST_F( cuda, unordered_map_performance_near)
{
Perf::run_performance_tests<Kokkos::Cuda,true>("cuda-near");
}
TEST_F( cuda, unordered_map_performance_far)
{
Perf::run_performance_tests<Kokkos::Cuda,false>("cuda-far");
}
}
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */

View File

@ -1,231 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
#define KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
#include <Kokkos_Core.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <vector>
#include <algorithm>
#include <impl/Kokkos_Timer.hpp>
// This test will simulate global ids
namespace Performance {
static const unsigned begin_id_size = 256u;
static const unsigned end_id_size = 1u << 22;
static const unsigned id_step = 2u;
union helper
{
uint32_t word;
uint8_t byte[4];
};
template <typename Device>
struct generate_ids
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
local_id_view local_2_global;
generate_ids( local_id_view & ids)
: local_2_global(ids)
{
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const
{
helper x = {static_cast<uint32_t>(i)};
// shuffle the bytes of i to create a unique, semi-random global_id
x.word = ~x.word;
uint8_t tmp = x.byte[3];
x.byte[3] = x.byte[1];
x.byte[1] = tmp;
tmp = x.byte[2];
x.byte[2] = x.byte[0];
x.byte[0] = tmp;
local_2_global[i] = x.word;
}
};
template <typename Device>
struct fill_map
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
global_id_view global_2_local;
local_id_view local_2_global;
fill_map( global_id_view gIds, local_id_view lIds)
: global_2_local(gIds) , local_2_global(lIds)
{
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const
{
global_2_local.insert( local_2_global[i], i);
}
};
template <typename Device>
struct find_test
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
typedef Kokkos::UnorderedMap<const uint32_t, const size_type,execution_space> global_id_view;
global_id_view global_2_local;
local_id_view local_2_global;
typedef size_t value_type;
find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors)
: global_2_local(gIds) , local_2_global(lIds)
{
Kokkos::parallel_reduce(local_2_global.dimension_0(), *this, num_errors);
}
KOKKOS_INLINE_FUNCTION
void init(value_type & v) const
{ v = 0; }
KOKKOS_INLINE_FUNCTION
void join(volatile value_type & dst, volatile value_type const & src) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(size_type i, value_type & num_errors) const
{
uint32_t index = global_2_local.find( local_2_global[i] );
if ( global_2_local.value_at(index) != i) ++num_errors;
}
};
template <typename Device>
void test_global_to_local_ids(unsigned num_ids)
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
//size
std::cout << num_ids << ", ";
double elasped_time = 0;
Kokkos::Impl::Timer timer;
local_id_view local_2_global("local_ids", num_ids);
global_id_view global_2_local((3u*num_ids)/2u);
//create
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
// generate unique ids
{
generate_ids<Device> gen(local_2_global);
}
Device::fence();
// generate
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
{
fill_map<Device> fill(global_2_local, local_2_global);
}
Device::fence();
// fill
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
size_t num_errors = 0;
for (int i=0; i<100; ++i)
{
find_test<Device> find(global_2_local, local_2_global,num_errors);
}
Device::fence();
// find
elasped_time = timer.seconds();
std::cout << elasped_time << std::endl;
ASSERT_EQ( num_errors, 0u);
}
} // namespace Performance
#endif //KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP

View File

@ -1,50 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
int main(int argc, char *argv[]) {
::testing::InitGoogleTest(&argc,argv);
return RUN_ALL_TESTS();
}

View File

@ -1,131 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <TestGlobal2LocalIds.hpp>
#include <TestUnorderedMapPerformance.hpp>
#include <iomanip>
#include <sstream>
#include <string>
#include <fstream>
namespace Performance {
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count()
* Kokkos::hwloc::get_available_cores_per_numa()
* Kokkos::hwloc::get_available_threads_per_core()
;
}
std::cout << "OpenMP: " << num_threads << std::endl;
Kokkos::OpenMP::initialize( num_threads );
std::cout << "available threads: " << omp_get_max_threads() << std::endl;
}
static void TearDownTestCase()
{
Kokkos::OpenMP::finalize();
omp_set_num_threads(1);
ASSERT_EQ( 1 , omp_get_max_threads() );
}
};
TEST_F( openmp, global_2_local)
{
std::cout << "OpenMP" << std::endl;
std::cout << "size, create, generate, fill, find" << std::endl;
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
test_global_to_local_ids<Kokkos::OpenMP>(i);
}
TEST_F( openmp, unordered_map_performance_near)
{
unsigned num_openmp = 4;
if (Kokkos::hwloc::available()) {
num_openmp = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::ostringstream base_file_name;
base_file_name << "openmp-" << num_openmp << "-near";
Perf::run_performance_tests<Kokkos::OpenMP,true>(base_file_name.str());
}
TEST_F( openmp, unordered_map_performance_far)
{
unsigned num_openmp = 4;
if (Kokkos::hwloc::available()) {
num_openmp = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::ostringstream base_file_name;
base_file_name << "openmp-" << num_openmp << "-far";
Perf::run_performance_tests<Kokkos::OpenMP,false>(base_file_name.str());
}
} // namespace test

View File

@ -1,126 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <iomanip>
#include <TestGlobal2LocalIds.hpp>
#include <TestUnorderedMapPerformance.hpp>
#include <iomanip>
#include <sstream>
#include <string>
#include <fstream>
namespace Performance {
class threads : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::cout << "Threads: " << num_threads << std::endl;
Kokkos::Threads::initialize( num_threads );
}
static void TearDownTestCase()
{
Kokkos::Threads::finalize();
}
};
TEST_F( threads, global_2_local)
{
std::cout << "Threads" << std::endl;
std::cout << "size, create, generate, fill, find" << std::endl;
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
test_global_to_local_ids<Kokkos::Threads>(i);
}
TEST_F( threads, unordered_map_performance_near)
{
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::ostringstream base_file_name;
base_file_name << "threads-" << num_threads << "-near";
Perf::run_performance_tests<Kokkos::Threads,true>(base_file_name.str());
}
TEST_F( threads, unordered_map_performance_far)
{
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::ostringstream base_file_name;
base_file_name << "threads-" << num_threads << "-far";
Perf::run_performance_tests<Kokkos::Threads,false>(base_file_name.str());
}
} // namespace Performance

View File

@ -1,262 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
#define KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
#include <impl/Kokkos_Timer.hpp>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <sstream>
namespace Perf {
template <typename Device, bool Near>
struct UnorderedMapTest
{
typedef Device execution_space;
typedef Kokkos::UnorderedMap<uint32_t, uint32_t, execution_space> map_type;
typedef typename map_type::histogram_type histogram_type;
struct value_type {
uint32_t failed_count;
uint32_t max_list;
};
uint32_t capacity;
uint32_t inserts;
uint32_t collisions;
double seconds;
map_type map;
histogram_type histogram;
UnorderedMapTest( uint32_t arg_capacity, uint32_t arg_inserts, uint32_t arg_collisions)
: capacity(arg_capacity)
, inserts(arg_inserts)
, collisions(arg_collisions)
, seconds(0)
, map(capacity)
, histogram(map.get_histogram())
{
Kokkos::Impl::Timer wall_clock ;
wall_clock.reset();
value_type v = {};
int loop_count = 0;
do {
++loop_count;
v = value_type();
Kokkos::parallel_reduce(inserts, *this, v);
if (v.failed_count > 0u) {
const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + v.failed_count/collisions ;
map.rehash( new_capacity );
}
} while (v.failed_count > 0u);
seconds = wall_clock.seconds();
switch (loop_count)
{
case 1u: std::cout << " \033[0;32m" << loop_count << "\033[0m "; break;
case 2u: std::cout << " \033[1;31m" << loop_count << "\033[0m "; break;
default: std::cout << " \033[0;31m" << loop_count << "\033[0m "; break;
}
std::cout << std::setprecision(2) << std::fixed << std::setw(5) << (1e9*(seconds/(inserts))) << "; " << std::flush;
histogram.calculate();
Device::fence();
}
void print(std::ostream & metrics_out, std::ostream & length_out, std::ostream & distance_out, std::ostream & block_distance_out)
{
metrics_out << map.capacity() << " , ";
metrics_out << inserts/collisions << " , ";
metrics_out << (100.0 * inserts/collisions) / map.capacity() << " , ";
metrics_out << inserts << " , ";
metrics_out << (map.failed_insert() ? "true" : "false") << " , ";
metrics_out << collisions << " , ";
metrics_out << 1e9*(seconds/inserts) << " , ";
metrics_out << seconds << std::endl;
length_out << map.capacity() << " , ";
length_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
length_out << collisions << " , ";
histogram.print_length(length_out);
distance_out << map.capacity() << " , ";
distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
distance_out << collisions << " , ";
histogram.print_distance(distance_out);
block_distance_out << map.capacity() << " , ";
block_distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
block_distance_out << collisions << " , ";
histogram.print_block_distance(block_distance_out);
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const
{
v.failed_count = 0;
v.max_list = 0;
}
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, const volatile value_type & src ) const
{
dst.failed_count += src.failed_count;
dst.max_list = src.max_list < dst.max_list ? dst.max_list : src.max_list;
}
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & v) const
{
const uint32_t key = Near ? i/collisions : i%(inserts/collisions);
typename map_type::insert_result result = map.insert(key,i);
v.failed_count += !result.failed() ? 0 : 1;
v.max_list = result.list_position() < v.max_list ? v.max_list : result.list_position();
}
};
//#define KOKKOS_COLLECT_UNORDERED_MAP_METRICS
template <typename Device, bool Near>
void run_performance_tests(std::string const & base_file_name)
{
#if defined(KOKKOS_COLLECT_UNORDERED_MAP_METRICS)
std::string metrics_file_name = base_file_name + std::string("-metrics.csv");
std::string length_file_name = base_file_name + std::string("-length.csv");
std::string distance_file_name = base_file_name + std::string("-distance.csv");
std::string block_distance_file_name = base_file_name + std::string("-block_distance.csv");
std::ofstream metrics_out( metrics_file_name.c_str(), std::ofstream::out );
std::ofstream length_out( length_file_name.c_str(), std::ofstream::out );
std::ofstream distance_out( distance_file_name.c_str(), std::ofstream::out );
std::ofstream block_distance_out( block_distance_file_name.c_str(), std::ofstream::out );
/*
const double test_ratios[] = {
0.50
, 0.75
, 0.80
, 0.85
, 0.90
, 0.95
, 1.00
, 1.25
, 2.00
};
*/
const double test_ratios[] = { 1.00 };
const int num_ratios = sizeof(test_ratios) / sizeof(double);
/*
const uint32_t collisions[] {
1
, 4
, 16
, 64
};
*/
const uint32_t collisions[] = { 16 };
const int num_collisions = sizeof(collisions) / sizeof(uint32_t);
// set up file headers
metrics_out << "Capacity , Unique , Percent Full , Attempted Inserts , Failed Inserts , Collision Ratio , Nanoseconds/Inserts, Seconds" << std::endl;
length_out << "Capacity , Percent Full , ";
distance_out << "Capacity , Percent Full , ";
block_distance_out << "Capacity , Percent Full , ";
for (int i=0; i<100; ++i) {
length_out << i << " , ";
distance_out << i << " , ";
block_distance_out << i << " , ";
}
length_out << "\b\b\b " << std::endl;
distance_out << "\b\b\b " << std::endl;
block_distance_out << "\b\b\b " << std::endl;
Kokkos::Impl::Timer wall_clock ;
for (int i=0; i < num_collisions ; ++i) {
wall_clock.reset();
std::cout << "Collisions: " << collisions[i] << std::endl;
for (int j = 0; j < num_ratios; ++j) {
std::cout << std::setprecision(1) << std::fixed << std::setw(5) << (100.0*test_ratios[j]) << "% " << std::flush;
for (uint32_t capacity = 1<<14; capacity < 1<<25; capacity = capacity << 1) {
uint32_t inserts = static_cast<uint32_t>(test_ratios[j]*(capacity));
std::cout << capacity << std::flush;
UnorderedMapTest<Device, Near> test(capacity, inserts*collisions[i], collisions[i]);
Device::fence();
test.print(metrics_out, length_out, distance_out, block_distance_out);
}
std::cout << "\b\b " << std::endl;
}
std::cout << " " << wall_clock.seconds() << " secs" << std::endl;
}
metrics_out.close();
length_out.close();
distance_out.close();
block_distance_out.close();
#else
(void)base_file_name;
std::cout << "skipping test" << std::endl;
#endif
}
} // namespace Perf
#endif //KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP

View File

@ -1,437 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BITSET_HPP
#define KOKKOS_BITSET_HPP
#include <Kokkos_Core.hpp>
#include <Kokkos_Functional.hpp>
#include <impl/Kokkos_Bitset_impl.hpp>
#include <stdexcept>
namespace Kokkos {
template <typename Device = Kokkos::DefaultExecutionSpace >
class Bitset;
template <typename Device = Kokkos::DefaultExecutionSpace >
class ConstBitset;
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
/// A thread safe view to a bitset
template <typename Device>
class Bitset
{
public:
typedef Device execution_space;
typedef unsigned size_type;
enum { BIT_SCAN_REVERSE = 1u };
enum { MOVE_HINT_BACKWARD = 2u };
enum {
BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u
, BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE
, BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD
, BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD
};
private:
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
enum { block_mask = block_size-1u };
enum { block_shift = static_cast<int>(Impl::power_of_two<block_size>::value) };
public:
/// constructor
/// arg_size := number of bit in set
Bitset(unsigned arg_size = 0u)
: m_size(arg_size)
, m_last_block_mask(0u)
, m_blocks("Bitset", ((m_size + block_mask) >> block_shift) )
{
for (int i=0, end = static_cast<int>(m_size & block_mask); i < end; ++i) {
m_last_block_mask |= 1u << i;
}
}
/// assignment
Bitset<Device> & operator = (Bitset<Device> const & rhs)
{
this->m_size = rhs.m_size;
this->m_last_block_mask = rhs.m_last_block_mask;
this->m_blocks = rhs.m_blocks;
return *this;
}
/// copy constructor
Bitset( Bitset<Device> const & rhs)
: m_size( rhs.m_size )
, m_last_block_mask( rhs.m_last_block_mask )
, m_blocks( rhs.m_blocks )
{}
/// number of bits in the set
/// can be call from the host or the device
KOKKOS_FORCEINLINE_FUNCTION
unsigned size() const
{ return m_size; }
/// number of bits which are set to 1
/// can only be called from the host
unsigned count() const
{
Impl::BitsetCount< Bitset<Device> > f(*this);
return f.apply();
}
/// set all bits to 1
/// can only be called from the host
void set()
{
Kokkos::deep_copy(m_blocks, ~0u );
if (m_last_block_mask) {
//clear the unused bits in the last block
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
raw_deep_copy( m_blocks.ptr_on_device() + (m_blocks.dimension_0() -1u), &m_last_block_mask, sizeof(unsigned));
}
}
/// set all bits to 0
/// can only be called from the host
void reset()
{
Kokkos::deep_copy(m_blocks, 0u );
}
/// set all bits to 0
/// can only be called from the host
void clear()
{
Kokkos::deep_copy(m_blocks, 0u );
}
/// set i'th bit to 1
/// can only be called from the device
KOKKOS_FORCEINLINE_FUNCTION
bool set( unsigned i ) const
{
if ( i < m_size ) {
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return !( atomic_fetch_or( block_ptr, mask ) & mask );
}
return false;
}
/// set i'th bit to 0
/// can only be called from the device
KOKKOS_FORCEINLINE_FUNCTION
bool reset( unsigned i ) const
{
if ( i < m_size ) {
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return atomic_fetch_and( block_ptr, ~mask ) & mask;
}
return false;
}
/// return true if the i'th bit set to 1
/// can only be called from the device
KOKKOS_FORCEINLINE_FUNCTION
bool test( unsigned i ) const
{
if ( i < m_size ) {
const unsigned block = volatile_load(&m_blocks[ i >> block_shift ]);
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return block & mask;
}
return false;
}
/// used with find_any_set_near or find_any_unset_near functions
/// returns the max number of times those functions should be call
/// when searching for an available bit
KOKKOS_FORCEINLINE_FUNCTION
unsigned max_hint() const
{
return m_blocks.dimension_0();
}
/// find a bit set to 1 near the hint
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
/// and if result.first is false the result.second is a new hint
KOKKOS_INLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
{
const unsigned block_idx = (hint >> block_shift) < m_blocks.dimension_0() ? (hint >> block_shift) : 0;
const unsigned offset = hint & block_mask;
unsigned block = volatile_load(&m_blocks[ block_idx ]);
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1)) ? block : block & m_last_block_mask ;
return find_any_helper(block_idx, offset, block, scan_direction);
}
/// find a bit set to 0 near the hint
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
/// and if result.first is false the result.second is a new hint
KOKKOS_INLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_unset_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
{
const unsigned block_idx = hint >> block_shift;
const unsigned offset = hint & block_mask;
unsigned block = volatile_load(&m_blocks[ block_idx ]);
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1) ) ? ~block : ~block & m_last_block_mask ;
return find_any_helper(block_idx, offset, block, scan_direction);
}
private:
KOKKOS_FORCEINLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_helper(unsigned block_idx, unsigned offset, unsigned block, unsigned scan_direction) const
{
Kokkos::pair<bool, unsigned> result( block > 0u, 0);
if (!result.first) {
result.second = update_hint( block_idx, offset, scan_direction );
}
else {
result.second = scan_block( (block_idx << block_shift)
, offset
, block
, scan_direction
);
}
return result;
}
KOKKOS_FORCEINLINE_FUNCTION
unsigned scan_block(unsigned block_start, int offset, unsigned block, unsigned scan_direction ) const
{
offset = !(scan_direction & BIT_SCAN_REVERSE) ? offset : (offset + block_mask) & block_mask;
block = Impl::rotate_right(block, offset);
return ((( !(scan_direction & BIT_SCAN_REVERSE) ?
Impl::bit_scan_forward(block) :
Impl::bit_scan_reverse(block)
) + offset
) & block_mask
) + block_start;
}
KOKKOS_FORCEINLINE_FUNCTION
unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const
{
block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
block_idx = block_idx >= 0 ? block_idx : m_blocks.dimension_0() - 1;
block_idx = block_idx < static_cast<long long>(m_blocks.dimension_0()) ? block_idx : 0;
return static_cast<unsigned>(block_idx)*block_size + offset;
}
private:
unsigned m_size;
unsigned m_last_block_mask;
View< unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
private:
template <typename DDevice>
friend class Bitset;
template <typename DDevice>
friend class ConstBitset;
template <typename Bitset>
friend struct Impl::BitsetCount;
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
};
/// a thread-safe view to a const bitset
/// i.e. can only test bits
template <typename Device>
class ConstBitset
{
public:
typedef Device execution_space;
typedef unsigned size_type;
private:
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
enum { block_mask = block_size -1u };
enum { block_shift = static_cast<int>(Impl::power_of_two<block_size>::value) };
public:
ConstBitset()
: m_size (0)
{}
ConstBitset(Bitset<Device> const& rhs)
: m_size(rhs.m_size)
, m_blocks(rhs.m_blocks)
{}
ConstBitset(ConstBitset<Device> const& rhs)
: m_size( rhs.m_size )
, m_blocks( rhs.m_blocks )
{}
ConstBitset<Device> & operator = (Bitset<Device> const & rhs)
{
this->m_size = rhs.m_size;
this->m_blocks = rhs.m_blocks;
return *this;
}
ConstBitset<Device> & operator = (ConstBitset<Device> const & rhs)
{
this->m_size = rhs.m_size;
this->m_blocks = rhs.m_blocks;
return *this;
}
KOKKOS_FORCEINLINE_FUNCTION
unsigned size() const
{
return m_size;
}
unsigned count() const
{
Impl::BitsetCount< ConstBitset<Device> > f(*this);
return f.apply();
}
KOKKOS_FORCEINLINE_FUNCTION
bool test( unsigned i ) const
{
if ( i < m_size ) {
const unsigned block = m_blocks[ i >> block_shift ];
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return block & mask;
}
return false;
}
private:
unsigned m_size;
View< const unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
private:
template <typename DDevice>
friend class ConstBitset;
template <typename Bitset>
friend struct Impl::BitsetCount;
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
};
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src)
{
if (dst.size() != src.size()) {
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
}
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
{
if (dst.size() != src.size()) {
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
}
template <typename DstDevice, typename SrcDevice>
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
{
if (dst.size() != src.size()) {
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
}
} // namespace Kokkos
#endif //KOKKOS_BITSET_HPP

View File

@ -1,840 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_DualView.hpp
/// \brief Declaration and definition of Kokkos::DualView.
///
/// This header file declares and defines Kokkos::DualView and its
/// related nonmember functions.
#ifndef KOKKOS_DUALVIEW_HPP
#define KOKKOS_DUALVIEW_HPP
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
namespace Kokkos {
/* \class DualView
* \brief Container to manage mirroring a Kokkos::View that lives
* in device memory with a Kokkos::View that lives in host memory.
*
* This class provides capabilities to manage data which exists in two
* memory spaces at the same time. It keeps views of the same layout
* on two memory spaces as well as modified flags for both
* allocations. Users are responsible for setting the modified flags
* manually if they change the data in either memory space, by calling
* the sync() method templated on the device where they modified the
* data. Users may synchronize data by calling the modify() function,
* templated on the device towards which they want to synchronize
* (i.e., the target of the one-way copy operation).
*
* The DualView class also provides convenience methods such as
* realloc, resize and capacity which call the appropriate methods of
* the underlying Kokkos::View objects.
*
* The four template arguments are the same as those of Kokkos::View.
* (Please refer to that class' documentation for a detailed
* description.)
*
* \tparam DataType The type of the entries stored in the container.
*
* \tparam Layout The array's layout in memory.
*
* \tparam Device The Kokkos Device type. If its memory space is
* not the same as the host's memory space, then DualView will
* contain two separate Views: one in device memory, and one in
* host memory. Otherwise, DualView will only store one View.
*
* \tparam MemoryTraits (optional) The user's intended memory access
* behavior. Please see the documentation of Kokkos::View for
* examples. The default suffices for most users.
*/
template< class DataType ,
class Arg1Type = void ,
class Arg2Type = void ,
class Arg3Type = void>
class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
{
public:
//! \name Typedefs for device types and various Kokkos::View specializations.
//@{
typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
//! The Kokkos Host Device type;
typedef typename traits::host_mirror_space host_mirror_space ;
//! The type of a Kokkos::View on the device.
typedef View< typename traits::data_type ,
typename traits::array_layout ,
typename traits::device_type ,
typename traits::memory_traits > t_dev ;
/// \typedef t_host
/// \brief The type of a Kokkos::View host mirror of \c t_dev.
typedef typename t_dev::HostMirror t_host ;
//! The type of a const View on the device.
//! The type of a Kokkos::View on the device.
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
typename traits::memory_traits > t_dev_const ;
/// \typedef t_host_const
/// \brief The type of a const View host mirror of \c t_dev_const.
typedef typename t_dev_const::HostMirror t_host_const;
//! The type of a const, random-access View on the device.
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
MemoryRandomAccess > t_dev_const_randomread ;
/// \typedef t_host_const_randomread
/// \brief The type of a const, random-access View host mirror of
/// \c t_dev_const_randomread.
typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread;
//! The type of an unmanaged View on the device.
typedef View< typename traits::data_type ,
typename traits::array_layout ,
typename traits::device_type ,
MemoryUnmanaged> t_dev_um;
//! The type of an unmanaged View host mirror of \c t_dev_um.
typedef View< typename t_host::data_type ,
typename t_host::array_layout ,
typename t_host::device_type ,
MemoryUnmanaged> t_host_um;
//! The type of a const unmanaged View on the device.
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
MemoryUnmanaged> t_dev_const_um;
//! The type of a const unmanaged View host mirror of \c t_dev_const_um.
typedef View<typename t_host::const_data_type,
typename t_host::array_layout,
typename t_host::device_type,
MemoryUnmanaged> t_host_const_um;
//@}
//! \name The two View instances.
//@{
t_dev d_view;
t_host h_view;
//@}
//! \name Counters to keep track of changes ("modified" flags)
//@{
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_device;
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_host;
//@}
//! \name Constructors
//@{
/// \brief Empty constructor.
///
/// Both device and host View objects are constructed using their
/// default constructors. The "modified" flags are both initialized
/// to "unmodified."
DualView () :
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
{}
/// \brief Constructor that allocates View objects on both host and device.
///
/// This constructor works like the analogous constructor of View.
/// The first argument is a string label, which is entirely for your
/// benefit. (Different DualView objects may have the same label if
/// you like.) The arguments that follow are the dimensions of the
/// View objects. For example, if the View has three dimensions,
/// the first three integer arguments will be nonzero, and you may
/// omit the integer arguments that follow.
DualView (const std::string& label,
const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0,
const size_t n7 = 0)
: d_view (label, n0, n1, n2, n3, n4, n5, n6, n7)
, h_view (create_mirror_view (d_view)) // without UVM, host View mirrors
, modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device"))
, modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
{}
//! Copy constructor (shallow copy)
template<class SS, class LS, class DS, class MS>
DualView (const DualView<SS,LS,DS,MS>& src) :
d_view (src.d_view),
h_view (src.h_view),
modified_device (src.modified_device),
modified_host (src.modified_host)
{}
/// \brief Create DualView from existing device and host View objects.
///
/// This constructor assumes that the device and host View objects
/// are synchronized. You, the caller, are responsible for making
/// sure this is the case before calling this constructor. After
/// this constructor returns, you may use DualView's sync() and
/// modify() methods to ensure synchronization of the View objects.
///
/// \param d_view_ Device View
/// \param h_view_ Host View (must have type t_host = t_dev::HostMirror)
DualView (const t_dev& d_view_, const t_host& h_view_) :
d_view (d_view_),
h_view (h_view_),
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
{
Impl::assert_shapes_are_equal (d_view.shape (), h_view.shape ());
}
//@}
//! \name Methods for synchronizing, marking as modified, and getting Views.
//@{
/// \brief Return a View on a specific device \c Device.
///
/// Please don't be afraid of the if_c expression in the return
/// value's type. That just tells the method what the return type
/// should be: t_dev if the \c Device template parameter matches
/// this DualView's device type, else t_host.
///
/// For example, suppose you create a DualView on Cuda, like this:
/// \code
/// typedef Kokkos::DualView<float, Kokkos::LayoutRight, Kokkos::Cuda> dual_view_type;
/// dual_view_type DV ("my dual view", 100);
/// \endcode
/// If you want to get the CUDA device View, do this:
/// \code
/// typename dual_view_type::t_dev cudaView = DV.view<Kokkos::Cuda> ();
/// \endcode
/// and if you want to get the host mirror of that View, do this:
/// \code
/// typedef typename Kokkos::HostSpace::execution_space host_device_type;
/// typename dual_view_type::t_host hostView = DV.view<host_device_type> ();
/// \endcode
template< class Device >
KOKKOS_INLINE_FUNCTION
const typename Impl::if_c<
Impl::is_same<typename t_dev::memory_space,
typename Device::memory_space>::value,
t_dev,
t_host>::type& view () const
{
return Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value,
t_dev,
t_host >::select (d_view , h_view);
}
/// \brief Update data on device or host only if data in the other
/// space has been marked as modified.
///
/// If \c Device is the same as this DualView's device type, then
/// copy data from host to device. Otherwise, copy data from device
/// to host. In either case, only copy if the source of the copy
/// has been modified.
///
/// This is a one-way synchronization only. If the target of the
/// copy has been modified, this operation will discard those
/// modifications. It will also reset both device and host modified
/// flags.
///
/// \note This method doesn't know on its own whether you modified
/// the data in either View. You must manually mark modified data
/// as modified, by calling the modify() method with the
/// appropriate template parameter.
template<class Device>
void sync( const typename Impl::enable_if<
( Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) ||
( Impl::is_same< Device , int>::value)
, int >::type& = 0)
{
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value ,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
deep_copy (d_view, h_view);
modified_host() = modified_device() = 0;
}
} else { // hopefully Device is the same as DualView's host type
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
deep_copy (h_view, d_view);
modified_host() = modified_device() = 0;
}
}
if(Impl::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) {
t_dev::execution_space::fence();
t_host::execution_space::fence();
}
}
template<class Device>
void sync ( const typename Impl::enable_if<
( ! Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) ||
( Impl::is_same< Device , int>::value)
, int >::type& = 0 )
{
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
}
} else { // hopefully Device is the same as DualView's host type
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
}
}
}
/// \brief Mark data as modified on the given device \c Device.
///
/// If \c Device is the same as this DualView's device type, then
/// mark the device's data as modified. Otherwise, mark the host's
/// data as modified.
template<class Device>
void modify () {
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
// Increment the device's modified count.
modified_device () = (modified_device () > modified_host () ?
modified_device () : modified_host ()) + 1;
} else { // hopefully Device is the same as DualView's host type
// Increment the host's modified count.
modified_host () = (modified_device () > modified_host () ?
modified_device () : modified_host ()) + 1;
}
}
//@}
//! \name Methods for reallocating or resizing the View objects.
//@{
/// \brief Reallocate both View objects.
///
/// This discards any existing contents of the objects, and resets
/// their modified flags. It does <i>not</i> copy the old contents
/// of either View into the new View objects.
void realloc( const size_t n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 ) {
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
h_view = create_mirror_view( d_view );
/* Reset dirty flags */
modified_device() = modified_host() = 0;
}
/// \brief Resize both views, copying old contents into new if necessary.
///
/// This method only copies the old contents into the new View
/// objects for the device which was last marked as modified.
void resize( const size_t n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 ) {
if(modified_device() >= modified_host()) {
/* Resize on Device */
::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
h_view = create_mirror_view( d_view );
/* Mark Device copy as modified */
modified_device() = modified_device()+1;
} else {
/* Realloc on Device */
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
t_host temp_view = create_mirror_view( d_view );
/* Remap on Host */
Kokkos::deep_copy( temp_view , h_view );
h_view = temp_view;
/* Mark Host copy as modified */
modified_host() = modified_host()+1;
}
}
//@}
//! \name Methods for getting capacity, stride, or dimension(s).
//@{
//! The allocation size (same as Kokkos::View::capacity).
size_t capacity() const {
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
return d_view.span();
#else
return d_view.capacity();
#endif
}
//! Get stride(s) for each dimension.
template< typename iType>
void stride(iType* stride_) const {
d_view.stride(stride_);
}
/* \brief return size of dimension 0 */
size_t dimension_0() const {return d_view.dimension_0();}
/* \brief return size of dimension 1 */
size_t dimension_1() const {return d_view.dimension_1();}
/* \brief return size of dimension 2 */
size_t dimension_2() const {return d_view.dimension_2();}
/* \brief return size of dimension 3 */
size_t dimension_3() const {return d_view.dimension_3();}
/* \brief return size of dimension 4 */
size_t dimension_4() const {return d_view.dimension_4();}
/* \brief return size of dimension 5 */
size_t dimension_5() const {return d_view.dimension_5();}
/* \brief return size of dimension 6 */
size_t dimension_6() const {return d_view.dimension_6();}
/* \brief return size of dimension 7 */
size_t dimension_7() const {return d_view.dimension_7();}
//@}
};
} // namespace Kokkos
//
// Partial specializations of Kokkos::subview() for DualView objects.
//
namespace Kokkos {
namespace Impl {
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
>
struct ViewSubview< DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
{
private:
typedef DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > SrcViewType ;
enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
// The source view rank must be equal to the input argument rank
// Once a void argument is encountered all subsequent arguments must be void.
enum { InputRank =
Impl::StaticAssert<( SrcViewType::rank ==
( V0 ? 0 : (
V1 ? 1 : (
V2 ? 2 : (
V3 ? 3 : (
V4 ? 4 : (
V5 ? 5 : (
V6 ? 6 : (
V7 ? 7 : 8 ))))))) ))
&&
( SrcViewType::rank ==
( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
>::value ? SrcViewType::rank : 0 };
enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
// Reverse
enum { R0_rev = 0 == InputRank ? 0u : (
1 == InputRank ? unsigned(R0) : (
2 == InputRank ? unsigned(R1) : (
3 == InputRank ? unsigned(R2) : (
4 == InputRank ? unsigned(R3) : (
5 == InputRank ? unsigned(R4) : (
6 == InputRank ? unsigned(R5) : (
7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
typedef typename SrcViewType::array_layout SrcViewLayout ;
// Choose array layout, attempting to preserve original layout if at all possible.
typedef typename Impl::if_c<
( // Same Layout IF
// OutputRank 0
( OutputRank == 0 )
||
// OutputRank 1 or 2, InputLayout Left, Interval 0
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
// Choose data type as a purely dynamic rank array to accomodate a runtime range.
typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
typename SrcViewType::value_type ********
>::type >::type >::type >::type >::type >::type >::type >::type OutputData ;
// Choose space.
// If the source view's template arg1 or arg2 is a space then use it,
// otherwise use the source view's execution space.
typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::execution_space
>::type >::type OutputSpace ;
public:
// If keeping the layout then match non-data type arguments
// else keep execution space and memory traits.
typedef typename
Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
, Kokkos::DualView< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type >
, Kokkos::DualView< OutputData , OutputViewLayout , OutputSpace
, typename SrcViewType::memory_traits >
>::type type ;
};
} /* namespace Impl */
} /* namespace Kokkos */
namespace Kokkos {
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , void , void , void
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , void , void , void
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0);
sub_view.h_view = subview(src.h_view,arg0);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , void , void
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , void , void
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1);
sub_view.h_view = subview(src.h_view,arg0,arg1);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , void
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , void
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , void , void ,void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 , class ArgType6 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 ,
const ArgType6 & arg6 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , ArgType7
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 ,
const ArgType6 & arg6 ,
const ArgType7 & arg7 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , ArgType7
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
//
// Partial specialization of Kokkos::deep_copy() for DualView objects.
//
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
void
deep_copy (DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
const DualView<ST,SL,SD,SM>& src )
{
if (src.modified_device () >= src.modified_host ()) {
deep_copy (dst.d_view, src.d_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
} else {
deep_copy (dst.h_view, src.h_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
}
}
} // namespace Kokkos
#endif

View File

@ -1,173 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_FUNCTIONAL_HPP
#define KOKKOS_FUNCTIONAL_HPP
#include <Kokkos_Macros.hpp>
#include <impl/Kokkos_Functional_impl.hpp>
namespace Kokkos {
// These should work for most types
template <typename T>
struct pod_hash
{
typedef T argument_type;
typedef T first_argument_type;
typedef uint32_t second_argument_type;
typedef uint32_t result_type;
KOKKOS_FORCEINLINE_FUNCTION
uint32_t operator()(T const & t) const
{ return Impl::MurmurHash3_x86_32( &t, sizeof(T), 0); }
KOKKOS_FORCEINLINE_FUNCTION
uint32_t operator()(T const & t, uint32_t seed) const
{ return Impl::MurmurHash3_x86_32( &t, sizeof(T), seed); }
};
template <typename T>
struct pod_equal_to
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return Impl::bitwise_equal(&a,&b); }
};
template <typename T>
struct pod_not_equal_to
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return !Impl::bitwise_equal(&a,&b); }
};
template <typename T>
struct equal_to
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a == b; }
};
template <typename T>
struct not_equal_to
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a != b; }
};
template <typename T>
struct greater
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a > b; }
};
template <typename T>
struct less
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a < b; }
};
template <typename T>
struct greater_equal
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a >= b; }
};
template <typename T>
struct less_equal
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a <= b; }
};
} // namespace Kokkos
#endif //KOKKOS_FUNCTIONAL_HPP

View File

@ -1,531 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SEGMENTED_VIEW_HPP_
#define KOKKOS_SEGMENTED_VIEW_HPP_
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
#include <cstdio>
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
namespace Kokkos {
namespace Experimental {
namespace Impl {
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
struct delete_segmented_view;
template<class MemorySpace>
inline
void DeviceSetAllocatableMemorySize(size_t) {}
#if defined( KOKKOS_HAVE_CUDA )
template<>
inline
void DeviceSetAllocatableMemorySize<Kokkos::CudaSpace>(size_t size) {
#ifdef __CUDACC__
size_t size_limit;
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
if(size_limit<size)
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
#endif
}
template<>
inline
void DeviceSetAllocatableMemorySize<Kokkos::CudaUVMSpace>(size_t size) {
#ifdef __CUDACC__
size_t size_limit;
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
if(size_limit<size)
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
#endif
}
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
}
template< class DataType ,
class Arg1Type = void ,
class Arg2Type = void ,
class Arg3Type = void>
class SegmentedView : public Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
{
public:
//! \name Typedefs for device types and various Kokkos::View specializations.
//@{
typedef Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
//! The type of a Kokkos::View on the device.
typedef Kokkos::View< typename traits::data_type ,
typename traits::array_layout ,
typename traits::memory_space ,
Kokkos::MemoryUnmanaged > t_dev ;
private:
Kokkos::View<t_dev*,typename traits::memory_space> segments_;
Kokkos::View<int,typename traits::memory_space> realloc_lock;
Kokkos::View<int,typename traits::memory_space> nsegments_;
size_t segment_length_;
size_t segment_length_m1_;
int max_segments_;
int segment_length_log2;
// Dimensions, cardinality, capacity, and offset computation for
// multidimensional array view of contiguous memory.
// Inherits from Impl::Shape
typedef Kokkos::Impl::ViewOffset< typename traits::shape_type
, typename traits::array_layout
> offset_map_type ;
offset_map_type m_offset_map ;
typedef Kokkos::View< typename traits::array_intrinsic_type ,
typename traits::array_layout ,
typename traits::memory_space ,
typename traits::memory_traits > array_type ;
typedef Kokkos::View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::memory_space ,
typename traits::memory_traits > const_type ;
typedef Kokkos::View< typename traits::non_const_data_type ,
typename traits::array_layout ,
typename traits::memory_space ,
typename traits::memory_traits > non_const_type ;
typedef Kokkos::View< typename traits::non_const_data_type ,
typename traits::array_layout ,
HostSpace ,
void > HostMirror ;
template< bool Accessible >
KOKKOS_INLINE_FUNCTION
typename Kokkos::Impl::enable_if< Accessible , typename traits::size_type >::type
dimension_0_intern() const { return nsegments_() * segment_length_ ; }
template< bool Accessible >
KOKKOS_INLINE_FUNCTION
typename Kokkos::Impl::enable_if< ! Accessible , typename traits::size_type >::type
dimension_0_intern() const
{
// In Host space
int n = 0 ;
#if ! defined( __CUDA_ARCH__ )
Kokkos::Impl::DeepCopy< HostSpace , typename traits::memory_space >( & n , nsegments_.ptr_on_device() , sizeof(int) );
#endif
return n * segment_length_ ;
}
public:
enum { Rank = traits::rank };
KOKKOS_INLINE_FUNCTION offset_map_type shape() const { return m_offset_map ; }
/* \brief return (current) size of dimension 0 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const {
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
int n = SegmentedView::dimension_0_intern< Accessible >();
return n ;
}
/* \brief return size of dimension 1 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; }
/* \brief return size of dimension 2 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; }
/* \brief return size of dimension 3 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; }
/* \brief return size of dimension 4 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; }
/* \brief return size of dimension 5 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; }
/* \brief return size of dimension 6 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; }
/* \brief return size of dimension 7 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; }
/* \brief return size of dimension 2 */
KOKKOS_INLINE_FUNCTION typename traits::size_type size() const {
return dimension_0() *
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7 ;
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
typename traits::size_type dimension( const iType & i ) const {
if(i==0)
return dimension_0();
else
return Kokkos::Impl::dimension( m_offset_map , i );
}
KOKKOS_INLINE_FUNCTION
typename traits::size_type capacity() {
return segments_.dimension_0() *
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7;
}
KOKKOS_INLINE_FUNCTION
typename traits::size_type get_num_segments() {
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
int n = SegmentedView::dimension_0_intern< Accessible >();
return n/segment_length_ ;
}
KOKKOS_INLINE_FUNCTION
typename traits::size_type get_max_segments() {
return max_segments_;
}
/// \brief Constructor that allocates View objects with an initial length of 0.
///
/// This constructor works mostly like the analogous constructor of View.
/// The first argument is a string label, which is entirely for your
/// benefit. (Different SegmentedView objects may have the same label if
/// you like.) The second argument 'view_length' is the size of the segments.
/// This number must be a power of two. The third argument n0 is the maximum
/// value for the first dimension of the segmented view. The maximal allocatable
/// number of Segments is thus: (n0+view_length-1)/view_length.
/// The arguments that follow are the other dimensions of the (1-7) of the
/// View objects. For example, for a View with 3 runtime dimensions,
/// the first 4 integer arguments will be nonzero:
/// SegmentedView("Name",32768,10000000,8,4). This allocates a SegmentedView
/// with a maximum of 306 segments of dimension (32768,8,4). The logical size of
/// the segmented view is (n,8,4) with n between 0 and 10000000.
/// You may omit the integer arguments that follow.
template< class LabelType >
SegmentedView(const LabelType & label ,
const size_t view_length ,
const size_t n0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0
): segment_length_(view_length),segment_length_m1_(view_length-1)
{
segment_length_log2 = -1;
size_t l = segment_length_;
while(l>0) {
l>>=1;
segment_length_log2++;
}
l = 1<<segment_length_log2;
if(l!=segment_length_)
Kokkos::Impl::throw_runtime_exception("Kokkos::SegmentedView requires a 'power of 2' segment length");
max_segments_ = (n0+segment_length_m1_)/segment_length_;
Impl::DeviceSetAllocatableMemorySize<typename traits::memory_space>(segment_length_*max_segments_*sizeof(typename traits::value_type));
segments_ = Kokkos::View<t_dev*,typename traits::execution_space>(label , max_segments_);
realloc_lock = Kokkos::View<int,typename traits::execution_space>("Lock");
nsegments_ = Kokkos::View<int,typename traits::execution_space>("nviews");
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n0*n1*n2*n3*n4*n5*n6*n7 );
}
KOKKOS_INLINE_FUNCTION
SegmentedView(const SegmentedView& src):
segments_(src.segments_),
realloc_lock (src.realloc_lock),
nsegments_ (src.nsegments_),
segment_length_(src.segment_length_),
segment_length_m1_(src.segment_length_m1_),
max_segments_ (src.max_segments_),
segment_length_log2(src.segment_length_log2),
m_offset_map (src.m_offset_map)
{}
KOKKOS_INLINE_FUNCTION
SegmentedView& operator= (const SegmentedView& src) {
segments_ = src.segments_;
realloc_lock = src.realloc_lock;
nsegments_ = src.nsegments_;
segment_length_= src.segment_length_;
segment_length_m1_= src.segment_length_m1_;
max_segments_ = src.max_segments_;
segment_length_log2= src.segment_length_log2;
m_offset_map = src.m_offset_map;
return *this;
}
~SegmentedView() {
if ( !segments_.tracker().ref_counting()) { return; }
size_t ref_count = segments_.tracker().ref_count();
if(ref_count == 1u) {
Kokkos::fence();
typename Kokkos::View<int,typename traits::execution_space>::HostMirror h_nviews("h_nviews");
Kokkos::deep_copy(h_nviews,nsegments_);
Kokkos::parallel_for(h_nviews(),Impl::delete_segmented_view<DataType , Arg1Type , Arg2Type, Arg3Type>(*this));
}
}
KOKKOS_INLINE_FUNCTION
t_dev get_segment(const int& i) const {
return segments_[i];
}
template< class MemberType>
KOKKOS_INLINE_FUNCTION
void grow (MemberType& team_member, const size_t& growSize) const {
if (growSize>max_segments_*segment_length_) {
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
return;
}
if(team_member.team_rank()==0) {
bool too_small = growSize > segment_length_ * nsegments_();
if (too_small) {
while(Kokkos::atomic_compare_exchange(&realloc_lock(),0,1) )
; // get the lock
too_small = growSize > segment_length_ * nsegments_(); // Recheck once we have the lock
if(too_small) {
while(too_small) {
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
typename traits::non_const_value_type* const ptr = new typename traits::non_const_value_type[alloc_size];
segments_(nsegments_()) =
t_dev(ptr,segment_length_,m_offset_map.N1,m_offset_map.N2,m_offset_map.N3,m_offset_map.N4,m_offset_map.N5,m_offset_map.N6,m_offset_map.N7);
nsegments_()++;
too_small = growSize > segment_length_ * nsegments_();
}
}
realloc_lock() = 0; //release the lock
}
}
team_member.team_barrier();
}
KOKKOS_INLINE_FUNCTION
void grow_non_thread_safe (const size_t& growSize) const {
if (growSize>max_segments_*segment_length_) {
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
return;
}
bool too_small = growSize > segment_length_ * nsegments_();
if(too_small) {
while(too_small) {
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
typename traits::non_const_value_type* const ptr =
new typename traits::non_const_value_type[alloc_size];
segments_(nsegments_()) =
t_dev (ptr, segment_length_, m_offset_map.N1, m_offset_map.N2,
m_offset_map.N3, m_offset_map.N4, m_offset_map.N5,
m_offset_map.N6, m_offset_map.N7);
nsegments_()++;
too_small = growSize > segment_length_ * nsegments_();
}
}
}
template< typename iType0 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value && traits::rank == 1 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_));
}
template< typename iType0 , typename iType1 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
traits::rank == 2 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1);
}
template< typename iType0 , typename iType1 , typename iType2 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
traits::rank == 3 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
traits::rank == 4 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
std::is_integral<iType4>::value &&
traits::rank == 5 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
std::is_integral<iType4>::value &&
std::is_integral<iType5>::value &&
traits::rank == 6 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 , typename iType6 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
std::is_integral<iType4>::value &&
std::is_integral<iType5>::value &&
std::is_integral<iType6>::value &&
traits::rank == 7 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 , typename iType6 , typename iType7 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
std::is_integral<iType4>::value &&
std::is_integral<iType5>::value &&
std::is_integral<iType6>::value &&
std::is_integral<iType7>::value &&
traits::rank == 8 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6,i7);
}
};
namespace Impl {
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
struct delete_segmented_view {
typedef SegmentedView<DataType , Arg1Type , Arg2Type, Arg3Type> view_type;
typedef typename view_type::execution_space execution_space;
view_type view_;
delete_segmented_view(view_type view):view_(view) {
}
KOKKOS_INLINE_FUNCTION
void operator() (int i) const {
delete [] view_.get_segment(i).ptr_on_device();
}
};
}
}
}
#endif
#endif

View File

@ -1,226 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STATICCRSGRAPH_HPP
#define KOKKOS_STATICCRSGRAPH_HPP
#include <string>
#include <vector>
#include <Kokkos_Core.hpp>
namespace Kokkos {
/// \class StaticCrsGraph
/// \brief Compressed row storage array.
///
/// \tparam DataType The type of stored entries. If a StaticCrsGraph is
/// used as the graph of a sparse matrix, then this is usually an
/// integer type, the type of the column indices in the sparse
/// matrix.
///
/// \tparam Arg1Type The second template parameter, corresponding
/// either to the Device type (if there are no more template
/// parameters) or to the Layout type (if there is at least one more
/// template parameter).
///
/// \tparam Arg2Type The third template parameter, which if provided
/// corresponds to the Device type.
///
/// \tparam SizeType The type of row offsets. Usually the default
/// parameter suffices. However, setting a nondefault value is
/// necessary in some cases, for example, if you want to have a
/// sparse matrices with dimensions (and therefore column indices)
/// that fit in \c int, but want to store more than <tt>INT_MAX</tt>
/// entries in the sparse matrix.
///
/// A row has a range of entries:
/// <ul>
/// <li> <tt> row_map[i0] <= entry < row_map[i0+1] </tt> </li>
/// <li> <tt> 0 <= i1 < row_map[i0+1] - row_map[i0] </tt> </li>
/// <li> <tt> entries( entry , i2 , i3 , ... ); </tt> </li>
/// <li> <tt> entries( row_map[i0] + i1 , i2 , i3 , ... ); </tt> </li>
/// </ul>
template< class DataType,
class Arg1Type,
class Arg2Type = void,
typename SizeType = typename ViewTraits<DataType*, Arg1Type, Arg2Type, void >::size_type>
class StaticCrsGraph {
private:
typedef ViewTraits<DataType*, Arg1Type, Arg2Type, void> traits;
public:
typedef DataType data_type;
typedef typename traits::array_layout array_layout;
typedef typename traits::execution_space execution_space;
typedef typename traits::device_type device_type;
typedef SizeType size_type;
typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type;
typedef StaticCrsGraph< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror;
typedef View< const size_type* , array_layout, device_type > row_map_type;
typedef View< DataType* , array_layout, device_type > entries_type;
entries_type entries;
row_map_type row_map;
//! Construct an empty view.
StaticCrsGraph () : entries(), row_map() {}
//! Copy constructor (shallow copy).
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map)
{}
template<class EntriesType, class RowMapType>
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_)
{}
/** \brief Assign to a view of the rhs array.
* If the old view is the last view
* then allocated memory is deallocated.
*/
StaticCrsGraph& operator= (const StaticCrsGraph& rhs) {
entries = rhs.entries;
row_map = rhs.row_map;
return *this;
}
/** \brief Destroy this view of the array.
* If the last view then allocated memory is deallocated.
*/
~StaticCrsGraph() {}
KOKKOS_INLINE_FUNCTION
size_type numRows() const {
return (row_map.dimension_0 () != 0) ?
row_map.dimension_0 () - static_cast<size_type> (1) :
static_cast<size_type> (0);
}
};
//----------------------------------------------------------------------------
template< class StaticCrsGraphType , class InputSizeType >
typename StaticCrsGraphType::staticcrsgraph_type
create_staticcrsgraph( const std::string & label ,
const std::vector< InputSizeType > & input );
template< class StaticCrsGraphType , class InputSizeType >
typename StaticCrsGraphType::staticcrsgraph_type
create_staticcrsgraph( const std::string & label ,
const std::vector< std::vector< InputSizeType > > & input );
//----------------------------------------------------------------------------
template< class DataType ,
class Arg1Type ,
class Arg2Type ,
typename SizeType >
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
template< class DataType ,
class Arg1Type ,
class Arg2Type ,
typename SizeType >
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#include <impl/Kokkos_StaticCrsGraph_factory.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class GraphType >
struct StaticCrsGraphMaximumEntry {
typedef typename GraphType::execution_space execution_space ;
typedef typename GraphType::data_type value_type ;
const typename GraphType::entries_type entries ;
StaticCrsGraphMaximumEntry( const GraphType & graph ) : entries( graph.entries ) {}
KOKKOS_INLINE_FUNCTION
void operator()( const unsigned i , value_type & update ) const
{ if ( update < entries(i) ) update = entries(i); }
KOKKOS_INLINE_FUNCTION
void init( value_type & update ) const
{ update = 0 ; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & update ,
volatile const value_type & input ) const
{ if ( update < input ) update = input ; }
};
}
template< class DataType, class Arg1Type, class Arg2Type, typename SizeType >
DataType maximum_entry( const StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > & graph )
{
typedef StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType> GraphType ;
typedef Impl::StaticCrsGraphMaximumEntry< GraphType > FunctorType ;
DataType result = 0 ;
Kokkos::parallel_reduce( graph.entries.dimension_0(),
FunctorType(graph), result );
return result ;
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_CRSARRAY_HPP */

View File

@ -1,848 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_UnorderedMap.hpp
/// \brief Declaration and definition of Kokkos::UnorderedMap.
///
/// This header file declares and defines Kokkos::UnorderedMap and its
/// related nonmember functions.
#ifndef KOKKOS_UNORDERED_MAP_HPP
#define KOKKOS_UNORDERED_MAP_HPP
#include <Kokkos_Core.hpp>
#include <Kokkos_Functional.hpp>
#include <Kokkos_Bitset.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_UnorderedMap_impl.hpp>
#include <iostream>
#include <stdint.h>
#include <stdexcept>
namespace Kokkos {
enum { UnorderedMapInvalidIndex = ~0u };
/// \brief First element of the return value of UnorderedMap::insert().
///
/// Inserting an element into an UnorderedMap is not guaranteed to
/// succeed. There are three possible conditions:
/// <ol>
/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually
/// means that the UnorderedMap ran out of space. </li>
/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key
/// did <i>not</i> exist in the table before. </li>
/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key
/// <i>did</i> exist in the table before. The new value was
/// ignored and the old value was left in place. </li>
/// </ol>
class UnorderedMapInsertResult
{
private:
enum Status{
SUCCESS = 1u << 31
, EXISTING = 1u << 30
, FREED_EXISTING = 1u << 29
, LIST_LENGTH_MASK = ~(SUCCESS | EXISTING | FREED_EXISTING)
};
public:
/// Did the map successful insert the key/value pair
KOKKOS_FORCEINLINE_FUNCTION
bool success() const { return (m_status & SUCCESS); }
/// Was the key already present in the map
KOKKOS_FORCEINLINE_FUNCTION
bool existing() const { return (m_status & EXISTING); }
/// Did the map fail to insert the key due to insufficent capacity
KOKKOS_FORCEINLINE_FUNCTION
bool failed() const { return m_index == UnorderedMapInvalidIndex; }
/// Did the map lose a race condition to insert a dupulicate key/value pair
/// where an index was claimed that needed to be released
KOKKOS_FORCEINLINE_FUNCTION
bool freed_existing() const { return (m_status & FREED_EXISTING); }
/// How many iterations through the insert loop did it take before the
/// map returned
KOKKOS_FORCEINLINE_FUNCTION
uint32_t list_position() const { return (m_status & LIST_LENGTH_MASK); }
/// Index where the key can be found as long as the insert did not fail
KOKKOS_FORCEINLINE_FUNCTION
uint32_t index() const { return m_index; }
KOKKOS_FORCEINLINE_FUNCTION
UnorderedMapInsertResult()
: m_index(UnorderedMapInvalidIndex)
, m_status(0)
{}
KOKKOS_FORCEINLINE_FUNCTION
void increment_list_position()
{
m_status += (list_position() < LIST_LENGTH_MASK) ? 1u : 0u;
}
KOKKOS_FORCEINLINE_FUNCTION
void set_existing(uint32_t i, bool arg_freed_existing)
{
m_index = i;
m_status = EXISTING | (arg_freed_existing ? FREED_EXISTING : 0u) | list_position();
}
KOKKOS_FORCEINLINE_FUNCTION
void set_success(uint32_t i)
{
m_index = i;
m_status = SUCCESS | list_position();
}
private:
uint32_t m_index;
uint32_t m_status;
};
/// \class UnorderedMap
/// \brief Thread-safe, performance-portable lookup table.
///
/// This class provides a lookup table. In terms of functionality,
/// this class compares to std::unordered_map (new in C++11).
/// "Unordered" means that keys are not stored in any particular
/// order, unlike (for example) std::map. "Thread-safe" means that
/// lookups, insertion, and deletion are safe to call by multiple
/// threads in parallel. "Performance-portable" means that parallel
/// performance of these operations is reasonable, on multiple
/// hardware platforms. Platforms on which performance has been
/// tested include conventional Intel x86 multicore processors, Intel
/// Xeon Phi ("MIC"), and NVIDIA GPUs.
///
/// Parallel performance portability entails design decisions that
/// might differ from one's expectation for a sequential interface.
/// This particularly affects insertion of single elements. In an
/// interface intended for sequential use, insertion might reallocate
/// memory if the original allocation did not suffice to hold the new
/// element. In this class, insertion does <i>not</i> reallocate
/// memory. This means that it might fail. insert() returns an enum
/// which indicates whether the insert failed. There are three
/// possible conditions:
/// <ol>
/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually
/// means that the UnorderedMap ran out of space. </li>
/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key
/// did <i>not</i> exist in the table before. </li>
/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key
/// <i>did</i> exist in the table before. The new value was
/// ignored and the old value was left in place. </li>
/// </ol>
///
/// \tparam Key Type of keys of the lookup table. If \c const, users
/// are not allowed to add or remove keys, though they are allowed
/// to change values. In that case, the implementation may make
/// optimizations specific to the <tt>Device</tt>. For example, if
/// <tt>Device</tt> is \c Cuda, it may use texture fetches to access
/// keys.
///
/// \tparam Value Type of values stored in the lookup table. You may use
/// \c void here, in which case the table will be a set of keys. If
/// \c const, users are not allowed to change entries.
/// In that case, the implementation may make
/// optimizations specific to the \c Device, such as using texture
/// fetches to access values.
///
/// \tparam Device The Kokkos Device type.
///
/// \tparam Hasher Definition of the hash function for instances of
/// <tt>Key</tt>. The default will calculate a bitwise hash.
///
/// \tparam EqualTo Definition of the equality function for instances of
/// <tt>Key</tt>. The default will do a bitwise equality comparison.
///
template < typename Key
, typename Value
, typename Device = Kokkos::DefaultExecutionSpace
, typename Hasher = pod_hash<typename Impl::remove_const<Key>::type>
, typename EqualTo = pod_equal_to<typename Impl::remove_const<Key>::type>
>
class UnorderedMap
{
private:
typedef typename ViewTraits<Key,Device,void,void>::host_mirror_space host_mirror_space ;
public:
//! \name Public types and constants
//@{
//key_types
typedef Key declared_key_type;
typedef typename Impl::remove_const<declared_key_type>::type key_type;
typedef typename Impl::add_const<key_type>::type const_key_type;
//value_types
typedef Value declared_value_type;
typedef typename Impl::remove_const<declared_value_type>::type value_type;
typedef typename Impl::add_const<value_type>::type const_value_type;
typedef Device execution_space;
typedef Hasher hasher_type;
typedef EqualTo equal_to_type;
typedef uint32_t size_type;
//map_types
typedef UnorderedMap<declared_key_type,declared_value_type,execution_space,hasher_type,equal_to_type> declared_map_type;
typedef UnorderedMap<key_type,value_type,execution_space,hasher_type,equal_to_type> insertable_map_type;
typedef UnorderedMap<const_key_type,value_type,execution_space,hasher_type,equal_to_type> modifiable_map_type;
typedef UnorderedMap<const_key_type,const_value_type,execution_space,hasher_type,equal_to_type> const_map_type;
static const bool is_set = Impl::is_same<void,value_type>::value;
static const bool has_const_key = Impl::is_same<const_key_type,declared_key_type>::value;
static const bool has_const_value = is_set || Impl::is_same<const_value_type,declared_value_type>::value;
static const bool is_insertable_map = !has_const_key && (is_set || !has_const_value);
static const bool is_modifiable_map = has_const_key && !has_const_value;
static const bool is_const_map = has_const_key && has_const_value;
typedef UnorderedMapInsertResult insert_result;
typedef UnorderedMap<Key,Value,host_mirror_space,Hasher,EqualTo> HostMirror;
typedef Impl::UnorderedMapHistogram<const_map_type> histogram_type;
//@}
private:
enum { invalid_index = ~static_cast<size_type>(0) };
typedef typename Impl::if_c< is_set, int, declared_value_type>::type impl_value_type;
typedef typename Impl::if_c< is_insertable_map
, View< key_type *, execution_space>
, View< const key_type *, execution_space, MemoryTraits<RandomAccess> >
>::type key_type_view;
typedef typename Impl::if_c< is_insertable_map || is_modifiable_map
, View< impl_value_type *, execution_space>
, View< const impl_value_type *, execution_space, MemoryTraits<RandomAccess> >
>::type value_type_view;
typedef typename Impl::if_c< is_insertable_map
, View< size_type *, execution_space>
, View< const size_type *, execution_space, MemoryTraits<RandomAccess> >
>::type size_type_view;
typedef typename Impl::if_c< is_insertable_map
, Bitset< execution_space >
, ConstBitset< execution_space>
>::type bitset_type;
enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 };
enum { num_scalars = 3 };
typedef View< int[num_scalars], LayoutLeft, execution_space> scalars_view;
public:
//! \name Public member functions
//@{
UnorderedMap()
: m_bounded_insert()
, m_hasher()
, m_equal_to()
, m_size()
, m_available_indexes()
, m_hash_lists()
, m_next_index()
, m_keys()
, m_values()
, m_scalars()
{}
/// \brief Constructor
///
/// \param capacity_hint [in] Initial guess of how many unique keys will be inserted into the map
/// \param hash [in] Hasher function for \c Key instances. The
/// default value usually suffices.
UnorderedMap( size_type capacity_hint, hasher_type hasher = hasher_type(), equal_to_type equal_to = equal_to_type() )
: m_bounded_insert(true)
, m_hasher(hasher)
, m_equal_to(equal_to)
, m_size()
, m_available_indexes(calculate_capacity(capacity_hint))
, m_hash_lists(ViewAllocateWithoutInitializing("UnorderedMap hash list"), Impl::find_hash_size(capacity()))
, m_next_index(ViewAllocateWithoutInitializing("UnorderedMap next index"), capacity()+1) // +1 so that the *_at functions can always return a valid reference
, m_keys("UnorderedMap keys",capacity()+1)
, m_values("UnorderedMap values",(is_set? 1 : capacity()+1))
, m_scalars("UnorderedMap scalars")
{
if (!is_insertable_map) {
throw std::runtime_error("Cannot construct a non-insertable (i.e. const key_type) unordered_map");
}
Kokkos::deep_copy(m_hash_lists, invalid_index);
Kokkos::deep_copy(m_next_index, invalid_index);
}
void reset_failed_insert_flag()
{
reset_flag(failed_insert_idx);
}
histogram_type get_histogram()
{
return histogram_type(*this);
}
//! Clear all entries in the table.
void clear()
{
m_bounded_insert = true;
if (capacity() == 0) return;
m_available_indexes.clear();
Kokkos::deep_copy(m_hash_lists, invalid_index);
Kokkos::deep_copy(m_next_index, invalid_index);
{
const key_type tmp = key_type();
Kokkos::deep_copy(m_keys,tmp);
}
if (is_set){
const impl_value_type tmp = impl_value_type();
Kokkos::deep_copy(m_values,tmp);
}
{
Kokkos::deep_copy(m_scalars, 0);
}
}
/// \brief Change the capacity of the the map
///
/// If there are no failed inserts the current size of the map will
/// be used as a lower bound for the input capacity.
/// If the map is not empty and does not have failed inserts
/// and the capacity changes then the current data is copied
/// into the resized / rehashed map.
///
/// This is <i>not</i> a device function; it may <i>not</i> be
/// called in a parallel kernel.
bool rehash(size_type requested_capacity = 0)
{
const bool bounded_insert = (capacity() == 0) || (size() == 0u);
return rehash(requested_capacity, bounded_insert );
}
bool rehash(size_type requested_capacity, bool bounded_insert)
{
if(!is_insertable_map) return false;
const size_type curr_size = size();
requested_capacity = (requested_capacity < curr_size) ? curr_size : requested_capacity;
insertable_map_type tmp(requested_capacity, m_hasher, m_equal_to);
if (curr_size) {
tmp.m_bounded_insert = false;
Impl::UnorderedMapRehash<insertable_map_type> f(tmp,*this);
f.apply();
}
tmp.m_bounded_insert = bounded_insert;
*this = tmp;
return true;
}
/// \brief The number of entries in the table.
///
/// This method has undefined behavior when erasable() is true.
///
/// Note that this is not a device function; it cannot be called in
/// a parallel kernel. The value is not stored as a variable; it
/// must be computed.
size_type size() const
{
if( capacity() == 0u ) return 0u;
if (modified()) {
m_size = m_available_indexes.count();
reset_flag(modified_idx);
}
return m_size;
}
/// \brief The current number of failed insert() calls.
///
/// This is <i>not</i> a device function; it may <i>not</i> be
/// called in a parallel kernel. The value is not stored as a
/// variable; it must be computed.
bool failed_insert() const
{
return get_flag(failed_insert_idx);
}
bool erasable() const
{
return is_insertable_map ? get_flag(erasable_idx) : false;
}
bool begin_erase()
{
bool result = !erasable();
if (is_insertable_map && result) {
execution_space::fence();
set_flag(erasable_idx);
execution_space::fence();
}
return result;
}
bool end_erase()
{
bool result = erasable();
if (is_insertable_map && result) {
execution_space::fence();
Impl::UnorderedMapErase<declared_map_type> f(*this);
f.apply();
execution_space::fence();
reset_flag(erasable_idx);
}
return result;
}
/// \brief The maximum number of entries that the table can hold.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_FORCEINLINE_FUNCTION
size_type capacity() const
{ return m_available_indexes.size(); }
/// \brief The number of hash table "buckets."
///
/// This is different than the number of entries that the table can
/// hold. Each key hashes to an index in [0, hash_capacity() - 1].
/// That index can hold zero or more entries. This class decides
/// what hash_capacity() should be, given the user's upper bound on
/// the number of entries the table must be able to hold.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_INLINE_FUNCTION
size_type hash_capacity() const
{ return m_hash_lists.dimension_0(); }
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel. As discussed in the class documentation, it need not
/// succeed. The return value tells you if it did.
///
/// \param k [in] The key to attempt to insert.
/// \param v [in] The corresponding value to attempt to insert. If
/// using this class as a set (with Value = void), then you need not
/// provide this value.
KOKKOS_INLINE_FUNCTION
insert_result insert(key_type const& k, impl_value_type const&v = impl_value_type()) const
{
insert_result result;
if ( !is_insertable_map || capacity() == 0u || m_scalars((int)erasable_idx) ) {
return result;
}
if ( !m_scalars((int)modified_idx) ) {
m_scalars((int)modified_idx) = true;
}
int volatile & failed_insert_ref = m_scalars((int)failed_insert_idx) ;
const size_type hash_value = m_hasher(k);
const size_type hash_list = hash_value % m_hash_lists.dimension_0();
size_type * curr_ptr = & m_hash_lists[ hash_list ];
size_type new_index = invalid_index ;
// Force integer multiply to long
size_type index_hint = static_cast<size_type>( (static_cast<double>(hash_list) * capacity()) / m_hash_lists.dimension_0());
size_type find_attempts = 0;
enum { bounded_find_attempts = 32u };
const size_type max_attempts = (m_bounded_insert && (bounded_find_attempts < m_available_indexes.max_hint()) ) ?
bounded_find_attempts :
m_available_indexes.max_hint();
bool not_done = true ;
#if defined( __MIC__ )
#pragma noprefetch
#endif
while ( not_done ) {
// Continue searching the unordered list for this key,
// list will only be appended during insert phase.
// Need volatile_load as other threads may be appending.
size_type curr = volatile_load(curr_ptr);
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
#if defined( __MIC__ )
#pragma noprefetch
#endif
while ( curr != invalid_index && ! m_equal_to( volatile_load(&m_keys[curr]), k) ) {
result.increment_list_position();
index_hint = curr;
curr_ptr = &m_next_index[curr];
curr = volatile_load(curr_ptr);
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
}
//------------------------------------------------------------
// If key already present then return that index.
if ( curr != invalid_index ) {
const bool free_existing = new_index != invalid_index;
if ( free_existing ) {
// Previously claimed an unused entry that was not inserted.
// Release this unused entry immediately.
if (!m_available_indexes.reset(new_index) ) {
printf("Unable to free existing\n");
}
}
result.set_existing(curr, free_existing);
not_done = false ;
}
//------------------------------------------------------------
// Key is not currently in the map.
// If the thread has claimed an entry try to insert now.
else {
//------------------------------------------------------------
// If have not already claimed an unused entry then do so now.
if (new_index == invalid_index) {
bool found = false;
// use the hash_list as the flag for the search direction
Kokkos::tie(found, index_hint) = m_available_indexes.find_any_unset_near( index_hint, hash_list );
// found and index and this thread set it
if ( !found && ++find_attempts >= max_attempts ) {
failed_insert_ref = true;
not_done = false ;
}
else if (m_available_indexes.set(index_hint) ) {
new_index = index_hint;
// Set key and value
KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_keys[new_index]);
m_keys[new_index] = k ;
if (!is_set) {
KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_values[new_index]);
m_values[new_index] = v ;
}
// Do not proceed until key and value are updated in global memory
memory_fence();
}
}
else if (failed_insert_ref) {
not_done = false;
}
// Attempt to append claimed entry into the list.
// Another thread may also be trying to append the same list so protect with atomic.
if ( new_index != invalid_index &&
curr == atomic_compare_exchange(curr_ptr, static_cast<size_type>(invalid_index), new_index) ) {
// Succeeded in appending
result.set_success(new_index);
not_done = false ;
}
}
} // while ( not_done )
return result ;
}
KOKKOS_INLINE_FUNCTION
bool erase(key_type const& k) const
{
bool result = false;
if(is_insertable_map && 0u < capacity() && m_scalars((int)erasable_idx)) {
if ( ! m_scalars((int)modified_idx) ) {
m_scalars((int)modified_idx) = true;
}
size_type index = find(k);
if (valid_at(index)) {
m_available_indexes.reset(index);
result = true;
}
}
return result;
}
/// \brief Find the given key \c k, if it exists in the table.
///
/// \return If the key exists in the table, the index of the
/// value corresponding to that key; otherwise, an invalid index.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_INLINE_FUNCTION
size_type find( const key_type & k) const
{
size_type curr = 0u < capacity() ? m_hash_lists( m_hasher(k) % m_hash_lists.dimension_0() ) : invalid_index ;
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
while (curr != invalid_index && !m_equal_to( m_keys[curr], k) ) {
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
curr = m_next_index[curr];
}
return curr;
}
/// \brief Does the key exist in the map
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_INLINE_FUNCTION
bool exists( const key_type & k) const
{
return valid_at(find(k));
}
/// \brief Get the value with \c i as its direct index.
///
/// \param i [in] Index directly into the array of entries.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
///
/// 'const value_type' via Cuda texture fetch must return by value.
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::if_c< (is_set || has_const_value), impl_value_type, impl_value_type &>::type
value_at(size_type i) const
{
return m_values[ is_set ? 0 : (i < capacity() ? i : capacity()) ];
}
/// \brief Get the key with \c i as its direct index.
///
/// \param i [in] Index directly into the array of entries.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_FORCEINLINE_FUNCTION
key_type key_at(size_type i) const
{
return m_keys[ i < capacity() ? i : capacity() ];
}
KOKKOS_FORCEINLINE_FUNCTION
bool valid_at(size_type i) const
{
return m_available_indexes.test(i);
}
template <typename SKey, typename SValue>
UnorderedMap( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src,
typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value,int>::type = 0
)
: m_bounded_insert(src.m_bounded_insert)
, m_hasher(src.m_hasher)
, m_equal_to(src.m_equal_to)
, m_size(src.m_size)
, m_available_indexes(src.m_available_indexes)
, m_hash_lists(src.m_hash_lists)
, m_next_index(src.m_next_index)
, m_keys(src.m_keys)
, m_values(src.m_values)
, m_scalars(src.m_scalars)
{}
template <typename SKey, typename SValue>
typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value
,declared_map_type & >::type
operator=( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src)
{
m_bounded_insert = src.m_bounded_insert;
m_hasher = src.m_hasher;
m_equal_to = src.m_equal_to;
m_size = src.m_size;
m_available_indexes = src.m_available_indexes;
m_hash_lists = src.m_hash_lists;
m_next_index = src.m_next_index;
m_keys = src.m_keys;
m_values = src.m_values;
m_scalars = src.m_scalars;
return *this;
}
template <typename SKey, typename SValue, typename SDevice>
typename Impl::enable_if< Impl::is_same< typename Impl::remove_const<SKey>::type, key_type>::value &&
Impl::is_same< typename Impl::remove_const<SValue>::type, value_type>::value
>::type
create_copy_view( UnorderedMap<SKey, SValue, SDevice, Hasher,EqualTo> const& src)
{
if (m_hash_lists.ptr_on_device() != src.m_hash_lists.ptr_on_device()) {
insertable_map_type tmp;
tmp.m_bounded_insert = src.m_bounded_insert;
tmp.m_hasher = src.m_hasher;
tmp.m_equal_to = src.m_equal_to;
tmp.m_size = src.size();
tmp.m_available_indexes = bitset_type( src.capacity() );
tmp.m_hash_lists = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap hash list"), src.m_hash_lists.dimension_0() );
tmp.m_next_index = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap next index"), src.m_next_index.dimension_0() );
tmp.m_keys = key_type_view( ViewAllocateWithoutInitializing("UnorderedMap keys"), src.m_keys.dimension_0() );
tmp.m_values = value_type_view( ViewAllocateWithoutInitializing("UnorderedMap values"), src.m_values.dimension_0() );
tmp.m_scalars = scalars_view("UnorderedMap scalars");
Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes);
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, typename SDevice::memory_space > raw_deep_copy;
raw_deep_copy(tmp.m_hash_lists.ptr_on_device(), src.m_hash_lists.ptr_on_device(), sizeof(size_type)*src.m_hash_lists.dimension_0());
raw_deep_copy(tmp.m_next_index.ptr_on_device(), src.m_next_index.ptr_on_device(), sizeof(size_type)*src.m_next_index.dimension_0());
raw_deep_copy(tmp.m_keys.ptr_on_device(), src.m_keys.ptr_on_device(), sizeof(key_type)*src.m_keys.dimension_0());
if (!is_set) {
raw_deep_copy(tmp.m_values.ptr_on_device(), src.m_values.ptr_on_device(), sizeof(impl_value_type)*src.m_values.dimension_0());
}
raw_deep_copy(tmp.m_scalars.ptr_on_device(), src.m_scalars.ptr_on_device(), sizeof(int)*num_scalars );
*this = tmp;
}
}
//@}
private: // private member functions
bool modified() const
{
return get_flag(modified_idx);
}
void set_flag(int flag) const
{
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
const int true_ = true;
raw_deep_copy(m_scalars.ptr_on_device() + flag, &true_, sizeof(int));
}
void reset_flag(int flag) const
{
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
const int false_ = false;
raw_deep_copy(m_scalars.ptr_on_device() + flag, &false_, sizeof(int));
}
bool get_flag(int flag) const
{
typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename execution_space::memory_space > raw_deep_copy;
int result = false;
raw_deep_copy(&result, m_scalars.ptr_on_device() + flag, sizeof(int));
return result;
}
static uint32_t calculate_capacity(uint32_t capacity_hint)
{
// increase by 16% and round to nears multiple of 128
return capacity_hint ? ((static_cast<uint32_t>(7ull*capacity_hint/6u) + 127u)/128u)*128u : 128u;
}
private: // private members
bool m_bounded_insert;
hasher_type m_hasher;
equal_to_type m_equal_to;
mutable size_type m_size;
bitset_type m_available_indexes;
size_type_view m_hash_lists;
size_type_view m_next_index;
key_type_view m_keys;
value_type_view m_values;
scalars_view m_scalars;
template <typename KKey, typename VValue, typename DDevice, typename HHash, typename EEqualTo>
friend class UnorderedMap;
template <typename UMap>
friend struct Impl::UnorderedMapErase;
template <typename UMap>
friend struct Impl::UnorderedMapHistogram;
template <typename UMap>
friend struct Impl::UnorderedMapPrint;
};
// Specialization of deep_copy for two UnorderedMap objects.
template < typename DKey, typename DT, typename DDevice
, typename SKey, typename ST, typename SDevice
, typename Hasher, typename EqualTo >
inline void deep_copy( UnorderedMap<DKey, DT, DDevice, Hasher, EqualTo> & dst
, const UnorderedMap<SKey, ST, SDevice, Hasher, EqualTo> & src )
{
dst.create_copy_view(src);
}
} // namespace Kokkos
#endif //KOKKOS_UNORDERED_MAP_HPP

View File

@ -1,287 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VECTOR_HPP
#define KOKKOS_VECTOR_HPP
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_DualView.hpp>
/* Drop in replacement for std::vector based on Kokkos::DualView
* Most functions only work on the host (it will not compile if called from device kernel)
*
*/
namespace Kokkos {
template <typename Scalar, class Space = Kokkos::DefaultExecutionSpace >
class vector : public DualView<Scalar*,LayoutLeft,Space> {
public:
typedef typename Space::memory_space memory_space;
typedef typename Space::execution_space execution_space;
typedef typename Kokkos::Device<execution_space,memory_space> device_type;
typedef Scalar value_type;
typedef Scalar* pointer;
typedef const Scalar* const_pointer;
typedef Scalar* reference;
typedef const Scalar* const_reference;
typedef Scalar* iterator;
typedef const Scalar* const_iterator;
private:
size_t _size;
typedef size_t size_type;
float _extra_storage;
typedef DualView<Scalar*,LayoutLeft,Space> DV;
public:
#ifdef KOKKOS_CUDA_USE_UVM
KOKKOS_INLINE_FUNCTION Scalar& operator() (int i) const {return DV::h_view(i);};
KOKKOS_INLINE_FUNCTION Scalar& operator[] (int i) const {return DV::h_view(i);};
#else
inline Scalar& operator() (int i) const {return DV::h_view(i);};
inline Scalar& operator[] (int i) const {return DV::h_view(i);};
#endif
/* Member functions which behave like std::vector functions */
vector():DV() {
_size = 0;
_extra_storage = 1.1;
DV::modified_host() = 1;
};
vector(int n, Scalar val=Scalar()):DualView<Scalar*,LayoutLeft,Space>("Vector",size_t(n*(1.1))) {
_size = n;
_extra_storage = 1.1;
DV::modified_host() = 1;
assign(n,val);
}
void resize(size_t n) {
if(n>=capacity())
DV::resize(size_t (n*_extra_storage));
_size = n;
}
void resize(size_t n, const Scalar& val) {
assign(n,val);
}
void assign (size_t n, const Scalar& val) {
/* Resize if necessary (behavour of std:vector) */
if(n>capacity())
DV::resize(size_t (n*_extra_storage));
_size = n;
/* Assign value either on host or on device */
if( DV::modified_host() >= DV::modified_device() ) {
set_functor_host f(DV::h_view,val);
parallel_for(n,f);
DV::t_host::execution_space::fence();
DV::modified_host()++;
} else {
set_functor f(DV::d_view,val);
parallel_for(n,f);
DV::t_dev::execution_space::fence();
DV::modified_device()++;
}
}
void reserve(size_t n) {
DV::resize(size_t (n*_extra_storage));
}
void push_back(Scalar val) {
DV::modified_host()++;
if(_size == capacity()) {
size_t new_size = _size*_extra_storage;
if(new_size == _size) new_size++;
DV::resize(new_size);
}
DV::h_view(_size) = val;
_size++;
};
void pop_back() {
_size--;
};
void clear() {
_size = 0;
}
size_type size() const {return _size;};
size_type max_size() const {return 2000000000;}
size_type capacity() const {return DV::capacity();};
bool empty() const {return _size==0;};
iterator begin() const {return &DV::h_view(0);};
iterator end() const {return &DV::h_view(_size);};
/* std::algorithms wich work originally with iterators, here they are implemented as member functions */
size_t
lower_bound (const size_t& start,
const size_t& theEnd,
const Scalar& comp_val) const
{
int lower = start; // FIXME (mfh 24 Apr 2014) narrowing conversion
int upper = _size > theEnd? theEnd : _size-1; // FIXME (mfh 24 Apr 2014) narrowing conversion
if (upper <= lower) {
return theEnd;
}
Scalar lower_val = DV::h_view(lower);
Scalar upper_val = DV::h_view(upper);
size_t idx = (upper+lower)/2;
Scalar val = DV::h_view(idx);
if(val>upper_val) return upper;
if(val<lower_val) return start;
while(upper>lower) {
if(comp_val>val) {
lower = ++idx;
} else {
upper = idx;
}
idx = (upper+lower)/2;
val = DV::h_view(idx);
}
return idx;
}
bool is_sorted() {
for(int i=0;i<_size-1;i++) {
if(DV::h_view(i)>DV::h_view(i+1)) return false;
}
return true;
}
iterator find(Scalar val) const {
if(_size == 0) return end();
int upper,lower,current;
current = _size/2;
upper = _size-1;
lower = 0;
if((val<DV::h_view(0)) || (val>DV::h_view(_size-1)) ) return end();
while(upper>lower)
{
if(val>DV::h_view(current)) lower = current+1;
else upper = current;
current = (upper+lower)/2;
}
if(val==DV::h_view(current)) return &DV::h_view(current);
else return end();
}
/* Additional functions for data management */
void device_to_host(){
deep_copy(DV::h_view,DV::d_view);
}
void host_to_device() const {
deep_copy(DV::d_view,DV::h_view);
}
void on_host() {
DV::modified_host() = DV::modified_device() + 1;
}
void on_device() {
DV::modified_device() = DV::modified_host() + 1;
}
void set_overallocation(float extra) {
_extra_storage = 1.0 + extra;
}
public:
struct set_functor {
typedef typename DV::t_dev::execution_space execution_space;
typename DV::t_dev _data;
Scalar _val;
set_functor(typename DV::t_dev data, Scalar val) :
_data(data),_val(val) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
_data(i) = _val;
}
};
struct set_functor_host {
typedef typename DV::t_host::execution_space execution_space;
typename DV::t_host _data;
Scalar _val;
set_functor_host(typename DV::t_host data, Scalar val) :
_data(data),_val(val) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
_data(i) = _val;
}
};
};
}
#endif

View File

@ -1,173 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BITSET_IMPL_HPP
#define KOKKOS_BITSET_IMPL_HPP
#include <Kokkos_Macros.hpp>
#include <stdint.h>
#include <cstdio>
#include <climits>
#include <iostream>
#include <iomanip>
namespace Kokkos { namespace Impl {
KOKKOS_FORCEINLINE_FUNCTION
unsigned rotate_right(unsigned i, int r)
{
enum { size = static_cast<int>(sizeof(unsigned)*CHAR_BIT) };
return r ? ((i >> r) | (i << (size-r))) : i ;
}
KOKKOS_FORCEINLINE_FUNCTION
int bit_scan_forward(unsigned i)
{
#if defined( __CUDA_ARCH__ )
return __ffs(i) - 1;
#elif defined( __GNUC__ ) || defined( __GNUG__ )
return __builtin_ffs(i) - 1;
#elif defined( __INTEL_COMPILER )
return _bit_scan_forward(i);
#else
unsigned t = 1u;
int r = 0;
while (i && (i & t == 0))
{
t = t << 1;
++r;
}
return r;
#endif
}
KOKKOS_FORCEINLINE_FUNCTION
int bit_scan_reverse(unsigned i)
{
enum { shift = static_cast<int>(sizeof(unsigned)*CHAR_BIT - 1) };
#if defined( __CUDA_ARCH__ )
return shift - __clz(i);
#elif defined( __GNUC__ ) || defined( __GNUG__ )
return shift - __builtin_clz(i);
#elif defined( __INTEL_COMPILER )
return _bit_scan_reverse(i);
#else
unsigned t = 1u << shift;
int r = 0;
while (i && (i & t == 0))
{
t = t >> 1;
++r;
}
return r;
#endif
}
// count the bits set
KOKKOS_FORCEINLINE_FUNCTION
int popcount(unsigned i)
{
#if defined( __CUDA_ARCH__ )
return __popc(i);
#elif defined( __GNUC__ ) || defined( __GNUG__ )
return __builtin_popcount(i);
#elif defined ( __INTEL_COMPILER )
return _popcnt32(i);
#else
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive
i = i - ((i >> 1) & ~0u/3u); // temp
i = (i & ~0u/15u*3u) + ((i >> 2) & ~0u/15u*3u); // temp
i = (i + (i >> 4)) & ~0u/255u*15u; // temp
return (int)((i * (~0u/255u)) >> (sizeof(unsigned) - 1) * CHAR_BIT); // count
#endif
}
template <typename Bitset>
struct BitsetCount
{
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space::execution_space execution_space;
typedef typename bitset_type::size_type size_type;
typedef size_type value_type;
bitset_type m_bitset;
BitsetCount( bitset_type const& bitset)
: m_bitset(bitset)
{}
size_type apply() const
{
size_type count = 0u;
parallel_reduce(m_bitset.m_blocks.dimension_0(), *this, count);
return count;
}
KOKKOS_INLINE_FUNCTION
static void init( value_type & count)
{
count = 0u;
}
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & count, const volatile size_type & incr )
{
count += incr;
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i, value_type & count) const
{
count += popcount(m_bitset.m_blocks[i]);
}
};
}} //Kokkos::Impl
#endif // KOKKOS_BITSET_IMPL_HPP

View File

@ -1,195 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_FUNCTIONAL_IMPL_HPP
#define KOKKOS_FUNCTIONAL_IMPL_HPP
#include <Kokkos_Macros.hpp>
#include <stdint.h>
namespace Kokkos { namespace Impl {
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
KOKKOS_FORCEINLINE_FUNCTION
uint32_t getblock32 ( const uint8_t * p, int i )
{
// used to avoid aliasing error which could cause errors with
// forced inlining
return ((uint32_t)p[i*4+0])
| ((uint32_t)p[i*4+1] << 8)
| ((uint32_t)p[i*4+2] << 16)
| ((uint32_t)p[i*4+3] << 24);
}
KOKKOS_FORCEINLINE_FUNCTION
uint32_t rotl32 ( uint32_t x, int8_t r )
{ return (x << r) | (x >> (32 - r)); }
KOKKOS_FORCEINLINE_FUNCTION
uint32_t fmix32 ( uint32_t h )
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
KOKKOS_INLINE_FUNCTION
uint32_t MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 4;
uint32_t h1 = seed;
const uint32_t c1 = 0xcc9e2d51;
const uint32_t c2 = 0x1b873593;
//----------
// body
for(int i=0; i<nblocks; ++i)
{
uint32_t k1 = getblock32(data,i);
k1 *= c1;
k1 = rotl32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = rotl32(h1,13);
h1 = h1*5+0xe6546b64;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
uint32_t k1 = 0;
switch(len & 3)
{
case 3: k1 ^= tail[2] << 16;
case 2: k1 ^= tail[1] << 8;
case 1: k1 ^= tail[0];
k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len;
h1 = fmix32(h1);
return h1;
}
#if defined( __GNUC__ ) /* GNU C */ || \
defined( __GNUG__ ) /* GNU C++ */ || \
defined( __clang__ )
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
#else
#define KOKKOS_MAY_ALIAS
#endif
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
NUM_16 = NUM_8 / 2,
NUM_32 = NUM_8 / 4,
NUM_64 = NUM_8 / 8
};
union {
T const * const ptr;
T64 const * const ptr64;
T32 const * const ptr32;
T16 const * const ptr16;
T8 const * const ptr8;
} a = {a_ptr}, b = {b_ptr};
bool result = true;
for (int i=0; i < NUM_64; ++i) {
result = result && a.ptr64[i] == b.ptr64[i];
}
if ( NUM_64*2 < NUM_32 ) {
result = result && a.ptr32[NUM_64*2] == b.ptr32[NUM_64*2];
}
if ( NUM_32*2 < NUM_16 ) {
result = result && a.ptr16[NUM_32*2] == b.ptr16[NUM_32*2];
}
if ( NUM_16*2 < NUM_8 ) {
result = result && a.ptr8[NUM_16*2] == b.ptr8[NUM_16*2];
}
return result;
}
#undef KOKKOS_MAY_ALIAS
}} // namespace Kokkos::Impl
#endif //KOKKOS_FUNCTIONAL_IMPL_HPP

View File

@ -1,208 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
#define KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
inline
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view ,
typename Impl::enable_if< ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
{
return view ;
}
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
inline
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view )
{
// Force copy:
//typedef Impl::ViewAssignment< Impl::ViewDefault > alloc ; // unused
typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type ;
typename staticcrsgraph_type::HostMirror tmp ;
typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map);
// Allocation to match:
tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const'
tmp.entries = create_mirror( view.entries );
// Deep copy:
deep_copy( tmp_row_map , view.row_map );
deep_copy( tmp.entries , view.entries );
return tmp ;
}
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
inline
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view ,
typename Impl::enable_if< ! ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
{
return create_mirror( view );
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class StaticCrsGraphType , class InputSizeType >
inline
typename StaticCrsGraphType::staticcrsgraph_type
create_staticcrsgraph( const std::string & label ,
const std::vector< InputSizeType > & input )
{
typedef StaticCrsGraphType output_type ;
//typedef std::vector< InputSizeType > input_type ; // unused
typedef typename output_type::entries_type entries_type ;
typedef View< typename output_type::size_type [] ,
typename output_type::array_layout ,
typename output_type::execution_space > work_type ;
output_type output ;
// Create the row map:
const size_t length = input.size();
{
work_type row_work( "tmp" , length + 1 );
typename work_type::HostMirror row_work_host =
create_mirror_view( row_work );
size_t sum = 0 ;
row_work_host[0] = 0 ;
for ( size_t i = 0 ; i < length ; ++i ) {
row_work_host[i+1] = sum += input[i];
}
deep_copy( row_work , row_work_host );
output.entries = entries_type( label , sum );
output.row_map = row_work ;
}
return output ;
}
//----------------------------------------------------------------------------
template< class StaticCrsGraphType , class InputSizeType >
inline
typename StaticCrsGraphType::staticcrsgraph_type
create_staticcrsgraph( const std::string & label ,
const std::vector< std::vector< InputSizeType > > & input )
{
typedef StaticCrsGraphType output_type ;
typedef typename output_type::entries_type entries_type ;
static_assert( entries_type::rank == 1
, "Graph entries view must be rank one" );
typedef View< typename output_type::size_type [] ,
typename output_type::array_layout ,
typename output_type::execution_space > work_type ;
output_type output ;
// Create the row map:
const size_t length = input.size();
{
work_type row_work( "tmp" , length + 1 );
typename work_type::HostMirror row_work_host =
create_mirror_view( row_work );
size_t sum = 0 ;
row_work_host[0] = 0 ;
for ( size_t i = 0 ; i < length ; ++i ) {
row_work_host[i+1] = sum += input[i].size();
}
deep_copy( row_work , row_work_host );
output.entries = entries_type( label , sum );
output.row_map = row_work ;
}
// Fill in the entries:
{
typename entries_type::HostMirror host_entries =
create_mirror_view( output.entries );
size_t sum = 0 ;
for ( size_t i = 0 ; i < length ; ++i ) {
for ( size_t j = 0 ; j < input[i].size() ; ++j , ++sum ) {
host_entries( sum ) = input[i][j] ;
}
}
deep_copy( output.entries , host_entries );
}
return output ;
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP */

View File

@ -1,101 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_UnorderedMap.hpp>
namespace Kokkos { namespace Impl {
uint32_t find_hash_size(uint32_t size)
{
if (size == 0u) return 0u;
// these primes try to preserve randomness of hash
static const uint32_t primes [] = {
3, 7, 13, 23, 53, 97, 193, 389, 769, 1543
, 2237, 2423, 2617, 2797, 2999, 3167, 3359, 3539
, 3727, 3911, 4441 , 4787 , 5119 , 5471 , 5801 , 6143 , 6521 , 6827
, 7177 , 7517 , 7853 , 8887 , 9587 , 10243 , 10937 , 11617 , 12289
, 12967 , 13649 , 14341 , 15013 , 15727
, 17749 , 19121 , 20479 , 21859 , 23209 , 24593 , 25939 , 27329
, 28669 , 30047 , 31469 , 35507 , 38231 , 40961 , 43711 , 46439
, 49157 , 51893 , 54617 , 57347 , 60077 , 62801 , 70583 , 75619
, 80669 , 85703 , 90749 , 95783 , 100823 , 105871 , 110909 , 115963
, 120997 , 126031 , 141157 , 151237 , 161323 , 171401 , 181499 , 191579
, 201653 , 211741 , 221813 , 231893 , 241979 , 252079
, 282311 , 302483 , 322649 , 342803 , 362969 , 383143 , 403301 , 423457
, 443629 , 463787 , 483953 , 504121 , 564617 , 604949 , 645313 , 685609
, 725939 , 766273 , 806609 , 846931 , 887261 , 927587 , 967919 , 1008239
, 1123477 , 1198397 , 1273289 , 1348177 , 1423067 , 1497983 , 1572869
, 1647761 , 1722667 , 1797581 , 1872461 , 1947359 , 2022253
, 2246953 , 2396759 , 2546543 , 2696363 , 2846161 , 2995973 , 3145739
, 3295541 , 3445357 , 3595117 , 3744941 , 3894707 , 4044503
, 4493921 , 4793501 , 5093089 , 5392679 , 5692279 , 5991883 , 6291469
, 6591059 , 6890641 , 7190243 , 7489829 , 7789447 , 8089033
, 8987807 , 9586981 , 10186177 , 10785371 , 11384539 , 11983729
, 12582917 , 13182109 , 13781291 , 14380469 , 14979667 , 15578861
, 16178053 , 17895707 , 19014187 , 20132683 , 21251141 , 22369661
, 23488103 , 24606583 , 25725083 , 26843549 , 27962027 , 29080529
, 30198989 , 31317469 , 32435981 , 35791397 , 38028379 , 40265327
, 42502283 , 44739259 , 46976221 , 49213237 , 51450131 , 53687099
, 55924061 , 58161041 , 60397993 , 62634959 , 64871921
, 71582857 , 76056727 , 80530643 , 85004567 , 89478503 , 93952427
, 98426347 , 102900263 , 107374217 , 111848111 , 116322053 , 120795971
, 125269877 , 129743807 , 143165587 , 152113427 , 161061283 , 170009141
, 178956983 , 187904819 , 196852693 , 205800547 , 214748383 , 223696237
, 232644089 , 241591943 , 250539763 , 259487603 , 268435399
};
const uint32_t num_primes = sizeof(primes)/sizeof(uint32_t);
uint32_t hsize = primes[num_primes-1] ;
for (uint32_t i = 0; i < num_primes; ++i) {
if (size <= primes[i]) {
hsize = primes[i];
break;
}
}
return hsize;
}
}} // namespace Kokkos::Impl

View File

@ -1,297 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_UNORDERED_MAP_IMPL_HPP
#define KOKKOS_UNORDERED_MAP_IMPL_HPP
#include <Kokkos_Core_fwd.hpp>
#include <stdint.h>
#include <cstdio>
#include <climits>
#include <iostream>
#include <iomanip>
namespace Kokkos { namespace Impl {
uint32_t find_hash_size( uint32_t size );
template <typename Map>
struct UnorderedMapRehash
{
typedef Map map_type;
typedef typename map_type::const_map_type const_map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
map_type m_dst;
const_map_type m_src;
UnorderedMapRehash( map_type const& dst, const_map_type const& src)
: m_dst(dst), m_src(src)
{}
void apply() const
{
parallel_for(m_src.capacity(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const
{
if ( m_src.valid_at(i) )
m_dst.insert(m_src.key_at(i), m_src.value_at(i));
}
};
template <typename UMap>
struct UnorderedMapErase
{
typedef UMap map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
typedef typename map_type::key_type key_type;
typedef typename map_type::impl_value_type value_type;
map_type m_map;
UnorderedMapErase( map_type const& map)
: m_map(map)
{}
void apply() const
{
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i ) const
{
const size_type invalid_index = map_type::invalid_index;
size_type curr = m_map.m_hash_lists(i);
size_type next = invalid_index;
// remove erased head of the linked-list
while (curr != invalid_index && !m_map.valid_at(curr)) {
next = m_map.m_next_index[curr];
m_map.m_next_index[curr] = invalid_index;
m_map.m_keys[curr] = key_type();
if (m_map.is_set) m_map.m_values[curr] = value_type();
curr = next;
m_map.m_hash_lists(i) = next;
}
// if the list is non-empty and the head is valid
if (curr != invalid_index && m_map.valid_at(curr) ) {
size_type prev = curr;
curr = m_map.m_next_index[prev];
while (curr != invalid_index) {
next = m_map.m_next_index[curr];
if (m_map.valid_at(curr)) {
prev = curr;
}
else {
// remove curr from list
m_map.m_next_index[prev] = next;
m_map.m_next_index[curr] = invalid_index;
m_map.m_keys[curr] = key_type();
if (map_type::is_set) m_map.m_values[curr] = value_type();
}
curr = next;
}
}
}
};
template <typename UMap>
struct UnorderedMapHistogram
{
typedef UMap map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
typedef View<int[100], execution_space> histogram_view;
typedef typename histogram_view::HostMirror host_histogram_view;
map_type m_map;
histogram_view m_length;
histogram_view m_distance;
histogram_view m_block_distance;
UnorderedMapHistogram( map_type const& map)
: m_map(map)
, m_length("UnorderedMap Histogram")
, m_distance("UnorderedMap Histogram")
, m_block_distance("UnorderedMap Histogram")
{}
void calculate()
{
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
}
void clear()
{
Kokkos::deep_copy(m_length, 0);
Kokkos::deep_copy(m_distance, 0);
Kokkos::deep_copy(m_block_distance, 0);
}
void print_length(std::ostream &out)
{
host_histogram_view host_copy = create_mirror_view(m_length);
Kokkos::deep_copy(host_copy, m_length);
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
{
out << host_copy[i] << " , ";
}
out << "\b\b\b " << std::endl;
}
void print_distance(std::ostream &out)
{
host_histogram_view host_copy = create_mirror_view(m_distance);
Kokkos::deep_copy(host_copy, m_distance);
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
{
out << host_copy[i] << " , ";
}
out << "\b\b\b " << std::endl;
}
void print_block_distance(std::ostream &out)
{
host_histogram_view host_copy = create_mirror_view(m_block_distance);
Kokkos::deep_copy(host_copy, m_block_distance);
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
{
out << host_copy[i] << " , ";
}
out << "\b\b\b " << std::endl;
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i ) const
{
const size_type invalid_index = map_type::invalid_index;
uint32_t length = 0;
size_type min_index = ~0u, max_index = 0;
for (size_type curr = m_map.m_hash_lists(i); curr != invalid_index; curr = m_map.m_next_index[curr]) {
++length;
min_index = (curr < min_index) ? curr : min_index;
max_index = (max_index < curr) ? curr : max_index;
}
size_type distance = (0u < length) ? max_index - min_index : 0u;
size_type blocks = (0u < length) ? max_index/32u - min_index/32u : 0u;
// normalize data
length = length < 100u ? length : 99u;
distance = distance < 100u ? distance : 99u;
blocks = blocks < 100u ? blocks : 99u;
if (0u < length)
{
atomic_fetch_add( &m_length(length), 1);
atomic_fetch_add( &m_distance(distance), 1);
atomic_fetch_add( &m_block_distance(blocks), 1);
}
}
};
template <typename UMap>
struct UnorderedMapPrint
{
typedef UMap map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
map_type m_map;
UnorderedMapPrint( map_type const& map)
: m_map(map)
{}
void apply()
{
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i ) const
{
const size_type invalid_index = map_type::invalid_index;
uint32_t list = m_map.m_hash_lists(i);
for (size_type curr = list, ii=0; curr != invalid_index; curr = m_map.m_next_index[curr], ++ii) {
printf("%d[%d]: %d->%d\n", list, ii, m_map.key_at(curr), m_map.value_at(curr));
}
}
};
template <typename DKey, typename DValue, typename SKey, typename SValue>
struct UnorderedMapCanAssign : public false_ {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<Key,Value,Key,Value> : public true_ {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key,Value,Key,Value> : public true_ {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key,const Value,Key,Value> : public true_ {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key,const Value,const Key,Value> : public true_ {};
}} //Kokkos::Impl
#endif // KOKKOS_UNORDERED_MAP_IMPL_HPP

View File

@ -1,92 +0,0 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests
TEST_TARGETS =
TARGETS =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_Cuda
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_Threads
TEST_TARGETS += test-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_OpenMP
TEST_TARGETS += test-openmp
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_Serial
TEST_TARGETS += test-serial
endif
KokkosContainers_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Cuda
KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Threads
KokkosContainers_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_OpenMP
KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Serial
test-cuda: KokkosContainers_UnitTest_Cuda
./KokkosContainers_UnitTest_Cuda
test-threads: KokkosContainers_UnitTest_Threads
./KokkosContainers_UnitTest_Threads
test-openmp: KokkosContainers_UnitTest_OpenMP
./KokkosContainers_UnitTest_OpenMP
test-serial: KokkosContainers_UnitTest_Serial
./KokkosContainers_UnitTest_Serial
build_all: $(TARGETS)
test: $(TEST_TARGETS)
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc

View File

@ -1,285 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_BITSET_HPP
#define KOKKOS_TEST_BITSET_HPP
#include <gtest/gtest.h>
#include <iostream>
namespace Test {
namespace Impl {
template <typename Bitset, bool Set>
struct TestBitset
{
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space execution_space;
typedef uint32_t value_type;
bitset_type m_bitset;
TestBitset( bitset_type const& bitset)
: m_bitset(bitset)
{}
unsigned testit(unsigned collisions)
{
execution_space::fence();
unsigned count = 0;
Kokkos::parallel_reduce( m_bitset.size()*collisions, *this, count);
return count;
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const { v = 0; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, const volatile value_type & src ) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & v) const
{
i = i % m_bitset.size();
if (Set) {
if (m_bitset.set(i)) {
if (m_bitset.test(i)) ++v;
}
}
else {
if (m_bitset.reset(i)) {
if (!m_bitset.test(i)) ++v;
}
}
}
};
template <typename Bitset>
struct TestBitsetTest
{
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space execution_space;
typedef uint32_t value_type;
bitset_type m_bitset;
TestBitsetTest( bitset_type const& bitset)
: m_bitset(bitset)
{}
unsigned testit()
{
execution_space::fence();
unsigned count = 0;
Kokkos::parallel_reduce( m_bitset.size(), *this, count);
return count;
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const { v = 0; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, const volatile value_type & src ) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & v) const
{
if (m_bitset.test( i )) ++v;
}
};
template <typename Bitset, bool Set>
struct TestBitsetAny
{
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space execution_space;
typedef uint32_t value_type;
bitset_type m_bitset;
TestBitsetAny( bitset_type const& bitset)
: m_bitset(bitset)
{}
unsigned testit()
{
execution_space::fence();
unsigned count = 0;
Kokkos::parallel_reduce( m_bitset.size(), *this, count);
return count;
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const { v = 0; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, const volatile value_type & src ) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & v) const
{
bool result = false;
unsigned attempts = 0;
uint32_t hint = (i >> 4) << 4;
while (attempts < m_bitset.max_hint()) {
if (Set) {
Kokkos::tie(result, hint) = m_bitset.find_any_unset_near(hint, i);
if (result && m_bitset.set(hint)) {
++v;
break;
}
else if (!result) {
++attempts;
}
}
else {
Kokkos::tie(result, hint) = m_bitset.find_any_set_near(hint, i);
if (result && m_bitset.reset(hint)) {
++v;
break;
}
else if (!result) {
++attempts;
}
}
}
}
};
} // namespace Impl
template <typename Device>
void test_bitset()
{
typedef Kokkos::Bitset< Device > bitset_type;
typedef Kokkos::ConstBitset< Device > const_bitset_type;
//unsigned test_sizes[] = { 0u, 1000u, 1u<<14, 1u<<16, 10000001 };
unsigned test_sizes[] = { 1000u, 1u<<14, 1u<<16, 10000001 };
for (int i=0, end = sizeof(test_sizes)/sizeof(unsigned); i<end; ++i) {
//std::cout << "Bitset " << test_sizes[i] << std::endl;
bitset_type bitset(test_sizes[i]);
//std::cout << " Check inital count " << std::endl;
// nothing should be set
{
Impl::TestBitsetTest< bitset_type > f(bitset);
uint32_t count = f.testit();
EXPECT_EQ(0u, count);
EXPECT_EQ(count, bitset.count());
}
//std::cout << " Check set() " << std::endl;
bitset.set();
// everything should be set
{
Impl::TestBitsetTest< const_bitset_type > f(bitset);
uint32_t count = f.testit();
EXPECT_EQ(bitset.size(), count);
EXPECT_EQ(count, bitset.count());
}
//std::cout << " Check reset() " << std::endl;
bitset.reset();
EXPECT_EQ(0u, bitset.count());
//std::cout << " Check set(i) " << std::endl;
// test setting bits
{
Impl::TestBitset< bitset_type, true > f(bitset);
uint32_t count = f.testit(10u);
EXPECT_EQ( bitset.size(), bitset.count());
EXPECT_EQ( bitset.size(), count );
}
//std::cout << " Check reset(i) " << std::endl;
// test resetting bits
{
Impl::TestBitset< bitset_type, false > f(bitset);
uint32_t count = f.testit(10u);
EXPECT_EQ( bitset.size(), count);
EXPECT_EQ( 0u, bitset.count() );
}
//std::cout << " Check find_any_set(i) " << std::endl;
// test setting any bits
{
Impl::TestBitsetAny< bitset_type, true > f(bitset);
uint32_t count = f.testit();
EXPECT_EQ( bitset.size(), bitset.count());
EXPECT_EQ( bitset.size(), count );
}
//std::cout << " Check find_any_unset(i) " << std::endl;
// test resetting any bits
{
Impl::TestBitsetAny< bitset_type, false > f(bitset);
uint32_t count = f.testit();
EXPECT_EQ( bitset.size(), count);
EXPECT_EQ( 0u, bitset.count() );
}
}
}
} // namespace Test
#endif //KOKKOS_TEST_BITSET_HPP

View File

@ -1,264 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_COMPLEX_HPP
#define KOKKOS_TEST_COMPLEX_HPP
#include <Kokkos_Complex.hpp>
#include <gtest/gtest.h>
#include <iostream>
namespace Test {
namespace Impl {
template <typename RealType>
void testComplexConstructors () {
typedef Kokkos::complex<RealType> complex_type;
complex_type z1;
complex_type z2 (0.0, 0.0);
complex_type z3 (1.0, 0.0);
complex_type z4 (0.0, 1.0);
complex_type z5 (-1.0, -2.0);
ASSERT_TRUE( z1 == z2 );
ASSERT_TRUE( z1 != z3 );
ASSERT_TRUE( z1 != z4 );
ASSERT_TRUE( z1 != z5 );
ASSERT_TRUE( z2 != z3 );
ASSERT_TRUE( z2 != z4 );
ASSERT_TRUE( z2 != z5 );
ASSERT_TRUE( z3 != z4 );
ASSERT_TRUE( z3 != z5 );
complex_type z6 (-1.0, -2.0);
ASSERT_TRUE( z5 == z6 );
// Make sure that complex has value semantics, in particular, that
// equality tests use values and not pointers, so that
// reassignment actually changes the value.
z1 = complex_type (-3.0, -4.0);
ASSERT_TRUE( z1.real () == -3.0 );
ASSERT_TRUE( z1.imag () == -4.0 );
ASSERT_TRUE( z1 != z2 );
complex_type z7 (1.0);
ASSERT_TRUE( z3 == z7 );
ASSERT_TRUE( z7 == 1.0 );
ASSERT_TRUE( z7 != -1.0 );
z7 = complex_type (5.0);
ASSERT_TRUE( z7.real () == 5.0 );
ASSERT_TRUE( z7.imag () == 0.0 );
}
template <typename RealType>
void testPlus () {
typedef Kokkos::complex<RealType> complex_type;
complex_type z1 (1.0, -1.0);
complex_type z2 (-1.0, 1.0);
complex_type z3 = z1 + z2;
ASSERT_TRUE( z3 == complex_type (0.0, 0.0) );
}
template <typename RealType>
void testMinus () {
typedef Kokkos::complex<RealType> complex_type;
// Test binary minus.
complex_type z1 (1.0, -1.0);
complex_type z2 (-1.0, 1.0);
complex_type z3 = z1 - z2;
ASSERT_TRUE( z3 == complex_type (2.0, -2.0) );
// Test unary minus.
complex_type z4 (3.0, -4.0);
ASSERT_TRUE( -z1 == complex_type (-3.0, 4.0) );
}
template <typename RealType>
void testTimes () {
typedef Kokkos::complex<RealType> complex_type;
complex_type z1 (1.0, -1.0);
complex_type z2 (-1.0, 1.0);
complex_type z3 = z1 - z2;
ASSERT_TRUE( z3 == complex_type (2.0, -2.0) );
// Test unary minus.
complex_type z4 (3.0, -4.0);
ASSERT_TRUE( z4 == complex_type (3.0, -4.0) );
ASSERT_TRUE( -z4 == complex_type (-3.0, 4.0) );
ASSERT_TRUE( z4 == -complex_type (-3.0, 4.0) );
}
template <typename RealType>
void testDivide () {
typedef Kokkos::complex<RealType> complex_type;
// Test division of a complex number by a real number.
complex_type z1 (1.0, -1.0);
complex_type z2 (1.0 / 2.0, -1.0 / 2.0);
ASSERT_TRUE( z1 / 2.0 == z2 );
// (-1+2i)/(1-i) == ((-1+2i)(1+i)) / ((1-i)(1+i))
// (-1+2i)(1+i) == -3 + i
complex_type z3 (-1.0, 2.0);
complex_type z4 (1.0, -1.0);
complex_type z5 (-3.0, 1.0);
ASSERT_TRUE(z3 * Kokkos::conj (z4) == z5 );
// Test division of a complex number by a complex number.
// This assumes that RealType is a floating-point type.
complex_type z6 (Kokkos::real (z5) / 2.0,
Kokkos::imag (z5) / 2.0);
complex_type z7 = z3 / z4;
ASSERT_TRUE( z7 == z6 );
}
template <typename RealType>
void testOutsideKernel () {
testComplexConstructors<RealType> ();
testPlus<RealType> ();
testTimes<RealType> ();
testDivide<RealType> ();
}
template<typename RealType, typename Device>
void testCreateView () {
typedef Kokkos::complex<RealType> complex_type;
Kokkos::View<complex_type*, Device> x ("x", 10);
ASSERT_TRUE( x.dimension_0 () == 10 );
// Test that View assignment works.
Kokkos::View<complex_type*, Device> x_nonconst = x;
Kokkos::View<const complex_type*, Device> x_const = x;
}
template<typename RealType, typename Device>
class Fill {
public:
typedef typename Device::execution_space execution_space;
typedef Kokkos::View<Kokkos::complex<RealType>*, Device> view_type;
typedef typename view_type::size_type size_type;
KOKKOS_INLINE_FUNCTION
void operator () (const size_type i) const {
x_(i) = val_;
}
Fill (const view_type& x, const Kokkos::complex<RealType>& val) :
x_ (x), val_ (val)
{}
private:
view_type x_;
const Kokkos::complex<RealType> val_;
};
template<typename RealType, typename Device>
class Sum {
public:
typedef typename Device::execution_space execution_space;
typedef Kokkos::View<const Kokkos::complex<RealType>*, Device> view_type;
typedef typename view_type::size_type size_type;
typedef Kokkos::complex<RealType> value_type;
KOKKOS_INLINE_FUNCTION
void operator () (const size_type i, Kokkos::complex<RealType>& sum) const {
sum += x_(i);
}
Sum (const view_type& x) : x_ (x) {}
private:
view_type x_;
};
template<typename RealType, typename Device>
void testInsideKernel () {
typedef Kokkos::complex<RealType> complex_type;
typedef Kokkos::View<complex_type*, Device> view_type;
typedef typename view_type::size_type size_type;
const size_type N = 1000;
view_type x ("x", N);
ASSERT_TRUE( x.dimension_0 () == N );
// Kokkos::parallel_reduce (N, [=] (const size_type i, complex_type& result) {
// result += x[i];
// });
Kokkos::parallel_for (N, Fill<RealType, Device> (x, complex_type (1.0, -1.0)));
complex_type sum;
Kokkos::parallel_reduce (N, Sum<RealType, Device> (x), sum);
ASSERT_TRUE( sum.real () == 1000.0 && sum.imag () == -1000.0 );
}
} // namespace Impl
template <typename Device>
void testComplex ()
{
Impl::testOutsideKernel<float> ();
Impl::testOutsideKernel<double> ();
Impl::testCreateView<float, Device> ();
Impl::testCreateView<double, Device> ();
Impl::testInsideKernel<float, Device> ();
Impl::testInsideKernel<double, Device> ();
}
} // namespace Test
#endif // KOKKOS_TEST_COMPLEX_HPP

View File

@ -1,206 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <iostream>
#include <iomanip>
#include <stdint.h>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_Vector.hpp>
#include <TestBitset.hpp>
#include <TestUnorderedMap.hpp>
#include <TestStaticCrsGraph.hpp>
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestSegmentedView.hpp>
//----------------------------------------------------------------------------
#ifdef KOKKOS_HAVE_CUDA
namespace Test {
class cuda : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase()
{
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};
TEST_F( cuda , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Cuda >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Cuda >();
}
void cuda_test_insert_close( uint32_t num_nodes
, uint32_t num_inserts
, uint32_t num_duplicates
)
{
test_insert< Kokkos::Cuda >( num_nodes, num_inserts, num_duplicates, true);
}
void cuda_test_insert_far( uint32_t num_nodes
, uint32_t num_inserts
, uint32_t num_duplicates
)
{
test_insert< Kokkos::Cuda >( num_nodes, num_inserts, num_duplicates, false);
}
void cuda_test_failed_insert( uint32_t num_nodes )
{
test_failed_insert< Kokkos::Cuda >( num_nodes );
}
void cuda_test_deep_copy( uint32_t num_nodes )
{
test_deep_copy< Kokkos::Cuda >( num_nodes );
}
void cuda_test_vector_combinations(unsigned int size)
{
test_vector_combinations<int,Kokkos::Cuda>(size);
}
void cuda_test_dualview_combinations(unsigned int size)
{
test_dualview_combinations<int,Kokkos::Cuda>(size);
}
void cuda_test_segmented_view(unsigned int size)
{
test_segmented_view<double,Kokkos::Cuda>(size);
}
void cuda_test_bitset()
{
test_bitset<Kokkos::Cuda>();
}
/*TEST_F( cuda, bitset )
{
cuda_test_bitset();
}*/
#define CUDA_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat ) \
TEST_F( cuda, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
cuda_test_insert_##name(num_nodes,num_inserts,num_duplicates); \
}
#define CUDA_FAILED_INSERT_TEST( num_nodes, repeat ) \
TEST_F( cuda, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
cuda_test_failed_insert(num_nodes); \
}
#define CUDA_ASSIGNEMENT_TEST( num_nodes, repeat ) \
TEST_F( cuda, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
cuda_test_assignment_operators(num_nodes); \
}
#define CUDA_DEEP_COPY( num_nodes, repeat ) \
TEST_F( cuda, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
cuda_test_deep_copy(num_nodes); \
}
#define CUDA_VECTOR_COMBINE_TEST( size ) \
TEST_F( cuda, vector_combination##size##x) { \
cuda_test_vector_combinations(size); \
}
#define CUDA_DUALVIEW_COMBINE_TEST( size ) \
TEST_F( cuda, dualview_combination##size##x) { \
cuda_test_dualview_combinations(size); \
}
#define CUDA_SEGMENTEDVIEW_TEST( size ) \
TEST_F( cuda, segmentedview_##size##x) { \
cuda_test_segmented_view(size); \
}
CUDA_DUALVIEW_COMBINE_TEST( 10 )
CUDA_VECTOR_COMBINE_TEST( 10 )
CUDA_VECTOR_COMBINE_TEST( 3057 )
CUDA_INSERT_TEST(close, 100000, 90000, 100, 500)
CUDA_INSERT_TEST(far, 100000, 90000, 100, 500)
CUDA_DEEP_COPY( 10000, 1 )
CUDA_FAILED_INSERT_TEST( 10000, 1000 )
CUDA_SEGMENTEDVIEW_TEST( 200 )
#undef CUDA_INSERT_TEST
#undef CUDA_FAILED_INSERT_TEST
#undef CUDA_ASSIGNEMENT_TEST
#undef CUDA_DEEP_COPY
#undef CUDA_VECTOR_COMBINE_TEST
#undef CUDA_DUALVIEW_COMBINE_TEST
#undef CUDA_SEGMENTEDVIEW_TEST
}
#endif /* #ifdef KOKKOS_HAVE_CUDA */

View File

@ -1,121 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TEST_DUALVIEW_HPP
#define KOKKOS_TEST_DUALVIEW_HPP
#include <gtest/gtest.h>
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <impl/Kokkos_Timer.hpp>
namespace Test {
namespace Impl {
template <typename Scalar, class Device>
struct test_dualview_combinations
{
typedef test_dualview_combinations<Scalar,Device> self_type;
typedef Scalar scalar_type;
typedef Device execution_space;
Scalar reference;
Scalar result;
template <typename ViewType>
Scalar run_me(unsigned int n,unsigned int m){
if(n<10) n = 10;
if(m<3) m = 3;
ViewType a("A",n,m);
Kokkos::deep_copy( a.d_view , 1 );
a.template modify<typename ViewType::execution_space>();
a.template sync<typename ViewType::host_mirror_space>();
a.h_view(5,1) = 3;
a.h_view(6,1) = 4;
a.h_view(7,2) = 5;
a.template modify<typename ViewType::host_mirror_space>();
ViewType b = Kokkos::subview(a,std::pair<unsigned int, unsigned int>(6,9),std::pair<unsigned int, unsigned int>(0,1));
a.template sync<typename ViewType::execution_space>();
b.template modify<typename ViewType::execution_space>();
Kokkos::deep_copy( b.d_view , 2 );
a.template sync<typename ViewType::host_mirror_space>();
Scalar count = 0;
for(unsigned int i = 0; i<a.d_view.dimension_0(); i++)
for(unsigned int j = 0; j<a.d_view.dimension_1(); j++)
count += a.h_view(i,j);
return count - a.d_view.dimension_0()*a.d_view.dimension_1()-2-4-3*2;
}
test_dualview_combinations(unsigned int size)
{
result = run_me< Kokkos::DualView<Scalar**,Kokkos::LayoutLeft,Device> >(size,3);
}
};
} // namespace Impl
template <typename Scalar, typename Device>
void test_dualview_combinations(unsigned int size)
{
Impl::test_dualview_combinations<Scalar,Device> test(size);
ASSERT_EQ( test.result,0);
}
} // namespace Test
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP

View File

@ -1,162 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_Vector.hpp>
//----------------------------------------------------------------------------
#include <TestBitset.hpp>
#include <TestUnorderedMap.hpp>
#include <TestStaticCrsGraph.hpp>
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestSegmentedView.hpp>
#include <TestComplex.hpp>
#include <iomanip>
namespace Test {
#ifdef KOKKOS_HAVE_OPENMP
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned threads_count = 4 ;
if ( Kokkos::hwloc::available() ) {
threads_count = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa();
}
Kokkos::OpenMP::initialize( threads_count );
}
static void TearDownTestCase()
{
Kokkos::OpenMP::finalize();
}
};
TEST_F( openmp, complex )
{
testComplex<Kokkos::OpenMP> ();
}
TEST_F( openmp, bitset )
{
test_bitset<Kokkos::OpenMP>();
}
TEST_F( openmp , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::OpenMP >();
TestStaticCrsGraph::run_test_graph2< Kokkos::OpenMP >();
}
#define OPENMP_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
TEST_F( openmp, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_insert<Kokkos::OpenMP>(num_nodes,num_inserts,num_duplicates, near); \
}
#define OPENMP_FAILED_INSERT_TEST( num_nodes, repeat ) \
TEST_F( openmp, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_failed_insert<Kokkos::OpenMP>(num_nodes); \
}
#define OPENMP_ASSIGNEMENT_TEST( num_nodes, repeat ) \
TEST_F( openmp, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_assignement_operators<Kokkos::OpenMP>(num_nodes); \
}
#define OPENMP_DEEP_COPY( num_nodes, repeat ) \
TEST_F( openmp, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_deep_copy<Kokkos::OpenMP>(num_nodes); \
}
#define OPENMP_VECTOR_COMBINE_TEST( size ) \
TEST_F( openmp, vector_combination##size##x) { \
test_vector_combinations<int,Kokkos::OpenMP>(size); \
}
#define OPENMP_DUALVIEW_COMBINE_TEST( size ) \
TEST_F( openmp, dualview_combination##size##x) { \
test_dualview_combinations<int,Kokkos::OpenMP>(size); \
}
#define OPENMP_SEGMENTEDVIEW_TEST( size ) \
TEST_F( openmp, segmentedview_##size##x) { \
test_segmented_view<double,Kokkos::OpenMP>(size); \
}
OPENMP_INSERT_TEST(close, 100000, 90000, 100, 500, true)
OPENMP_INSERT_TEST(far, 100000, 90000, 100, 500, false)
OPENMP_FAILED_INSERT_TEST( 10000, 1000 )
OPENMP_DEEP_COPY( 10000, 1 )
OPENMP_VECTOR_COMBINE_TEST( 10 )
OPENMP_VECTOR_COMBINE_TEST( 3057 )
OPENMP_DUALVIEW_COMBINE_TEST( 10 )
OPENMP_SEGMENTEDVIEW_TEST( 10000 )
#undef OPENMP_INSERT_TEST
#undef OPENMP_FAILED_INSERT_TEST
#undef OPENMP_ASSIGNEMENT_TEST
#undef OPENMP_DEEP_COPY
#undef OPENMP_VECTOR_COMBINE_TEST
#undef OPENMP_DUALVIEW_COMBINE_TEST
#undef OPENMP_SEGMENTEDVIEW_TEST
#endif
} // namespace test

View File

@ -1,708 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP
#define KOKKOS_TEST_SEGMENTEDVIEW_HPP
#include <gtest/gtest.h>
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <Kokkos_Core.hpp>
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
#include <Kokkos_SegmentedView.hpp>
#include <impl/Kokkos_Timer.hpp>
namespace Test {
namespace Impl {
template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
struct GrowTest;
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 1> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+team_member.team_size());
value += team_idx + team_member.team_rank();
if((a.dimension_0()>team_idx+team_member.team_rank()) &&
(a.dimension(0)>team_idx+team_member.team_rank()))
a(team_idx+team_member.team_rank()) = team_idx+team_member.team_rank();
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 2> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
value += team_idx + team_member.team_rank() + 13*k;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) {
a(team_idx+ team_member.team_rank(),k) =
team_idx+ team_member.team_rank() + 13*k;
}
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 3> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
value += team_idx + team_member.team_rank() + 13*k + 3*l;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
a(team_idx+ team_member.team_rank(),k,l) =
team_idx+ team_member.team_rank() + 13*k + 3*l;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 4> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
value += team_idx + team_member.team_rank() + 13*k + 3*l + 7*m;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
a(team_idx+ team_member.team_rank(),k,l,m) =
team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 5> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
for( typename ExecutionSpace::size_type n=0;n<3;n++)
value +=
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
a(team_idx+ team_member.team_rank(),k,l,m,n) =
team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 6> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
for( typename ExecutionSpace::size_type n=0;n<3;n++)
for( typename ExecutionSpace::size_type o=0;o<2;o++)
value +=
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
a(team_idx+ team_member.team_rank(),k,l,m,n,o) =
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 7> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
for( typename ExecutionSpace::size_type n=0;n<3;n++)
for( typename ExecutionSpace::size_type o=0;o<2;o++)
for( typename ExecutionSpace::size_type p=0;p<4;p++)
value +=
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
a(team_idx+ team_member.team_rank(),k,l,m,n,o,p) =
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 8> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx + team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
for( typename ExecutionSpace::size_type n=0;n<3;n++)
for( typename ExecutionSpace::size_type o=0;o<2;o++)
for( typename ExecutionSpace::size_type p=0;p<4;p++)
for( typename ExecutionSpace::size_type q=0;q<3;q++)
value +=
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q) =
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
}
}
};
template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
struct VerifyTest;
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 1> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
value += a(team_idx+ team_member.team_rank());
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 2> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
value += a(team_idx+ team_member.team_rank(),k);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 3> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
value += a(team_idx+ team_member.team_rank(),k,l);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 4> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
value += a(team_idx+ team_member.team_rank(),k,l,m);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 5> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
value += a(team_idx+ team_member.team_rank(),k,l,m,n);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 6> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 7> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 8> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q);
}
}
};
template <typename Scalar, class ExecutionSpace>
struct test_segmented_view
{
typedef test_segmented_view<Scalar,ExecutionSpace> self_type;
typedef Scalar scalar_type;
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
double result;
double reference;
template <class ViewType>
void run_me(ViewType a, int max_length){
const int team_size = Policy::team_size_max( GrowTest<ViewType,execution_space>(a) );
const int nteams = max_length/team_size;
reference = 0;
result = 0;
Kokkos::parallel_reduce(Policy(nteams,team_size),GrowTest<ViewType,execution_space>(a),reference);
Kokkos::fence();
Kokkos::parallel_reduce(Policy(nteams,team_size),VerifyTest<ViewType,execution_space>(a),result);
Kokkos::fence();
}
test_segmented_view(unsigned int size,int rank)
{
reference = 0;
result = 0;
const int dim_1 = 7;
const int dim_2 = 3;
const int dim_3 = 2;
const int dim_4 = 3;
const int dim_5 = 2;
const int dim_6 = 4;
//const int dim_7 = 3;
if(rank==1) {
typedef Kokkos::Experimental::SegmentedView<Scalar*,Kokkos::LayoutLeft,ExecutionSpace> rank1_view;
run_me< rank1_view >(rank1_view("Rank1",128,size), size);
}
if(rank==2) {
typedef Kokkos::Experimental::SegmentedView<Scalar**,Kokkos::LayoutLeft,ExecutionSpace> rank2_view;
run_me< rank2_view >(rank2_view("Rank2",128,size,dim_1), size);
}
if(rank==3) {
typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2],Kokkos::LayoutRight,ExecutionSpace> rank3_view;
run_me< rank3_view >(rank3_view("Rank3",128,size), size);
}
if(rank==4) {
typedef Kokkos::Experimental::SegmentedView<Scalar****,Kokkos::LayoutRight,ExecutionSpace> rank4_view;
run_me< rank4_view >(rank4_view("Rank4",128,size,dim_1,dim_2,dim_3), size);
}
if(rank==5) {
typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2][3],Kokkos::LayoutLeft,ExecutionSpace> rank5_view;
run_me< rank5_view >(rank5_view("Rank5",128,size), size);
}
if(rank==6) {
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2],Kokkos::LayoutRight,ExecutionSpace> rank6_view;
run_me< rank6_view >(rank6_view("Rank6",128,size,dim_1,dim_2,dim_3,dim_4), size);
}
if(rank==7) {
typedef Kokkos::Experimental::SegmentedView<Scalar*******,Kokkos::LayoutLeft,ExecutionSpace> rank7_view;
run_me< rank7_view >(rank7_view("Rank7",128,size,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6), size);
}
if(rank==8) {
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> rank8_view;
run_me< rank8_view >(rank8_view("Rank8",128,size,dim_1,dim_2,dim_3,dim_4), size);
}
}
};
} // namespace Impl
template <typename Scalar, class ExecutionSpace>
void test_segmented_view(unsigned int size)
{
{
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> view_type;
view_type a("A",128,size,7,3,2,3);
double reference;
Impl::GrowTest<view_type,ExecutionSpace> f(a);
const int team_size = Kokkos::TeamPolicy<ExecutionSpace>::team_size_max( f );
const int nteams = (size+team_size-1)/team_size;
Kokkos::parallel_reduce(Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size),f,reference);
size_t real_size = ((size+127)/128)*128;
ASSERT_EQ(real_size,a.dimension_0());
ASSERT_EQ(7,a.dimension_1());
ASSERT_EQ(3,a.dimension_2());
ASSERT_EQ(2,a.dimension_3());
ASSERT_EQ(3,a.dimension_4());
ASSERT_EQ(2,a.dimension_5());
ASSERT_EQ(4,a.dimension_6());
ASSERT_EQ(3,a.dimension_7());
ASSERT_EQ(real_size,a.dimension(0));
ASSERT_EQ(7,a.dimension(1));
ASSERT_EQ(3,a.dimension(2));
ASSERT_EQ(2,a.dimension(3));
ASSERT_EQ(3,a.dimension(4));
ASSERT_EQ(2,a.dimension(5));
ASSERT_EQ(4,a.dimension(6));
ASSERT_EQ(3,a.dimension(7));
ASSERT_EQ(8,a.Rank);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,1);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,2);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,3);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,4);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,5);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,6);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,7);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,8);
ASSERT_EQ(test.reference,test.result);
}
}
} // namespace Test
#else
template <typename Scalar, class ExecutionSpace>
void test_segmented_view(unsigned int ) {}
#endif
#endif /* #ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP */

View File

@ -1,158 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if ! defined(KOKKOS_HAVE_SERIAL)
# error "It doesn't make sense to build this file unless the Kokkos::Serial device is enabled. If you see this message, it probably means that there is an error in Kokkos' CMake build infrastructure."
#else
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_Vector.hpp>
#include <TestBitset.hpp>
#include <TestUnorderedMap.hpp>
#include <TestStaticCrsGraph.hpp>
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestSegmentedView.hpp>
#include <TestComplex.hpp>
#include <iomanip>
namespace Test {
class serial : public ::testing::Test {
protected:
static void SetUpTestCase () {
std::cout << std::setprecision(5) << std::scientific;
Kokkos::Serial::initialize ();
}
static void TearDownTestCase () {
Kokkos::Serial::finalize ();
}
};
TEST_F( serial , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Serial >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Serial >();
}
TEST_F( serial, complex )
{
testComplex<Kokkos::Serial> ();
}
TEST_F( serial, bitset )
{
test_bitset<Kokkos::Serial> ();
}
#define SERIAL_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
TEST_F( serial, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_insert<Kokkos::Serial> (num_nodes, num_inserts, num_duplicates, near); \
}
#define SERIAL_FAILED_INSERT_TEST( num_nodes, repeat ) \
TEST_F( serial, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_failed_insert<Kokkos::Serial> (num_nodes); \
}
#define SERIAL_ASSIGNEMENT_TEST( num_nodes, repeat ) \
TEST_F( serial, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_assignement_operators<Kokkos::Serial> (num_nodes); \
}
#define SERIAL_DEEP_COPY( num_nodes, repeat ) \
TEST_F( serial, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_deep_copy<Kokkos::Serial> (num_nodes); \
}
#define SERIAL_VECTOR_COMBINE_TEST( size ) \
TEST_F( serial, vector_combination##size##x) { \
test_vector_combinations<int,Kokkos::Serial>(size); \
}
#define SERIAL_DUALVIEW_COMBINE_TEST( size ) \
TEST_F( serial, dualview_combination##size##x) { \
test_dualview_combinations<int,Kokkos::Serial>(size); \
}
#define SERIAL_SEGMENTEDVIEW_TEST( size ) \
TEST_F( serial, segmentedview_##size##x) { \
test_segmented_view<double,Kokkos::Serial>(size); \
}
SERIAL_INSERT_TEST(close, 100000, 90000, 100, 500, true)
SERIAL_INSERT_TEST(far, 100000, 90000, 100, 500, false)
SERIAL_FAILED_INSERT_TEST( 10000, 1000 )
SERIAL_DEEP_COPY( 10000, 1 )
SERIAL_VECTOR_COMBINE_TEST( 10 )
SERIAL_VECTOR_COMBINE_TEST( 3057 )
SERIAL_DUALVIEW_COMBINE_TEST( 10 )
SERIAL_SEGMENTEDVIEW_TEST( 10000 )
#undef SERIAL_INSERT_TEST
#undef SERIAL_FAILED_INSERT_TEST
#undef SERIAL_ASSIGNEMENT_TEST
#undef SERIAL_DEEP_COPY
#undef SERIAL_VECTOR_COMBINE_TEST
#undef SERIAL_DUALVIEW_COMBINE_TEST
#undef SERIAL_SEGMENTEDVIEW_TEST
} // namespace test
#endif // KOKKOS_HAVE_SERIAL

View File

@ -1,149 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <vector>
#include <Kokkos_StaticCrsGraph.hpp>
/*--------------------------------------------------------------------------*/
namespace TestStaticCrsGraph {
template< class Space >
void run_test_graph()
{
typedef Kokkos::StaticCrsGraph< unsigned , Space > dView ;
typedef typename dView::HostMirror hView ;
const unsigned LENGTH = 1000 ;
dView dx ;
hView hx ;
std::vector< std::vector< int > > graph( LENGTH );
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
graph[i].reserve(8);
for ( size_t j = 0 ; j < 8 ; ++j ) {
graph[i].push_back( i + j * 3 );
}
}
dx = Kokkos::create_staticcrsgraph<dView>( "dx" , graph );
hx = Kokkos::create_mirror( dx );
ASSERT_EQ( hx.row_map.dimension_0() - 1 , LENGTH );
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
const size_t begin = hx.row_map[i];
const size_t n = hx.row_map[i+1] - begin ;
ASSERT_EQ( n , graph[i].size() );
for ( size_t j = 0 ; j < n ; ++j ) {
ASSERT_EQ( (int) hx.entries( j + begin ) , graph[i][j] );
}
}
}
template< class Space >
void run_test_graph2()
{
typedef Kokkos::StaticCrsGraph< unsigned[3] , Space > dView ;
typedef typename dView::HostMirror hView ;
const unsigned LENGTH = 10 ;
std::vector< size_t > sizes( LENGTH );
size_t total_length = 0 ;
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
total_length += ( sizes[i] = 6 + i % 4 );
}
dView dx = Kokkos::create_staticcrsgraph<dView>( "test" , sizes );
hView hx = Kokkos::create_mirror( dx );
hView mx = Kokkos::create_mirror( dx );
ASSERT_EQ( (size_t) dx.row_map.dimension_0() , (size_t) LENGTH + 1 );
ASSERT_EQ( (size_t) hx.row_map.dimension_0() , (size_t) LENGTH + 1 );
ASSERT_EQ( (size_t) mx.row_map.dimension_0() , (size_t) LENGTH + 1 );
ASSERT_EQ( (size_t) dx.entries.dimension_0() , (size_t) total_length );
ASSERT_EQ( (size_t) hx.entries.dimension_0() , (size_t) total_length );
ASSERT_EQ( (size_t) mx.entries.dimension_0() , (size_t) total_length );
ASSERT_EQ( (size_t) dx.entries.dimension_1() , (size_t) 3 );
ASSERT_EQ( (size_t) hx.entries.dimension_1() , (size_t) 3 );
ASSERT_EQ( (size_t) mx.entries.dimension_1() , (size_t) 3 );
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
const size_t entry_begin = hx.row_map[i];
const size_t entry_end = hx.row_map[i+1];
for ( size_t j = entry_begin ; j < entry_end ; ++j ) {
hx.entries(j,0) = j + 1 ;
hx.entries(j,1) = j + 2 ;
hx.entries(j,2) = j + 3 ;
}
}
Kokkos::deep_copy( dx.entries , hx.entries );
Kokkos::deep_copy( mx.entries , dx.entries );
ASSERT_EQ( mx.row_map.dimension_0() , (size_t) LENGTH + 1 );
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
const size_t entry_begin = mx.row_map[i];
const size_t entry_end = mx.row_map[i+1];
ASSERT_EQ( ( entry_end - entry_begin ) , sizes[i] );
for ( size_t j = entry_begin ; j < entry_end ; ++j ) {
ASSERT_EQ( (size_t) mx.entries( j , 0 ) , ( j + 1 ) );
ASSERT_EQ( (size_t) mx.entries( j , 1 ) , ( j + 2 ) );
ASSERT_EQ( (size_t) mx.entries( j , 2 ) , ( j + 3 ) );
}
}
}
} /* namespace TestStaticCrsGraph */

View File

@ -1,168 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_PTHREAD )
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_Vector.hpp>
#include <iomanip>
//----------------------------------------------------------------------------
#include <TestBitset.hpp>
#include <TestUnorderedMap.hpp>
#include <TestStaticCrsGraph.hpp>
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestSegmentedView.hpp>
namespace Test {
class threads : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count()
* Kokkos::hwloc::get_available_cores_per_numa()
// * Kokkos::hwloc::get_available_threads_per_core()
;
}
std::cout << "Threads: " << num_threads << std::endl;
Kokkos::Threads::initialize( num_threads );
}
static void TearDownTestCase()
{
Kokkos::Threads::finalize();
}
};
TEST_F( threads , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Threads >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Threads >();
}
/*TEST_F( threads, bitset )
{
test_bitset<Kokkos::Threads>();
}*/
#define THREADS_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
TEST_F( threads, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_insert<Kokkos::Threads>(num_nodes,num_inserts,num_duplicates, near); \
}
#define THREADS_FAILED_INSERT_TEST( num_nodes, repeat ) \
TEST_F( threads, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_failed_insert<Kokkos::Threads>(num_nodes); \
}
#define THREADS_ASSIGNEMENT_TEST( num_nodes, repeat ) \
TEST_F( threads, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_assignement_operators<Kokkos::Threads>(num_nodes); \
}
#define THREADS_DEEP_COPY( num_nodes, repeat ) \
TEST_F( threads, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_deep_copy<Kokkos::Threads>(num_nodes); \
}
#define THREADS_VECTOR_COMBINE_TEST( size ) \
TEST_F( threads, vector_combination##size##x) { \
test_vector_combinations<int,Kokkos::Threads>(size); \
}
#define THREADS_DUALVIEW_COMBINE_TEST( size ) \
TEST_F( threads, dualview_combination##size##x) { \
test_dualview_combinations<int,Kokkos::Threads>(size); \
}
#define THREADS_SEGMENTEDVIEW_TEST( size ) \
TEST_F( threads, segmentedview_##size##x) { \
test_segmented_view<double,Kokkos::Threads>(size); \
}
THREADS_INSERT_TEST(far, 100000, 90000, 100, 500, false)
THREADS_FAILED_INSERT_TEST( 10000, 1000 )
THREADS_DEEP_COPY( 10000, 1 )
THREADS_VECTOR_COMBINE_TEST( 10 )
THREADS_VECTOR_COMBINE_TEST( 3057 )
THREADS_DUALVIEW_COMBINE_TEST( 10 )
THREADS_SEGMENTEDVIEW_TEST( 10000 )
#undef THREADS_INSERT_TEST
#undef THREADS_FAILED_INSERT_TEST
#undef THREADS_ASSIGNEMENT_TEST
#undef THREADS_DEEP_COPY
#undef THREADS_VECTOR_COMBINE_TEST
#undef THREADS_DUALVIEW_COMBINE_TEST
#undef THREADS_SEGMENTEDVIEW_TEST
} // namespace Test
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */

View File

@ -1,313 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_UNORDERED_MAP_HPP
#define KOKKOS_TEST_UNORDERED_MAP_HPP
#include <gtest/gtest.h>
#include <iostream>
namespace Test {
namespace Impl {
template <typename MapType, bool Near = false>
struct TestInsert
{
typedef MapType map_type;
typedef typename map_type::execution_space execution_space;
typedef uint32_t value_type;
map_type map;
uint32_t inserts;
uint32_t collisions;
TestInsert( map_type arg_map, uint32_t arg_inserts, uint32_t arg_collisions)
: map(arg_map)
, inserts(arg_inserts)
, collisions(arg_collisions)
{}
void testit( bool rehash_on_fail = true )
{
execution_space::fence();
uint32_t failed_count = 0;
do {
failed_count = 0;
Kokkos::parallel_reduce(inserts, *this, failed_count);
if (rehash_on_fail && failed_count > 0u) {
const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + failed_count/collisions ;
map.rehash( new_capacity );
}
} while (rehash_on_fail && failed_count > 0u);
execution_space::fence();
}
KOKKOS_INLINE_FUNCTION
void init( value_type & failed_count ) const { failed_count = 0; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & failed_count, const volatile value_type & count ) const
{ failed_count += count; }
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & failed_count) const
{
const uint32_t key = Near ? i/collisions : i%(inserts/collisions);
if (map.insert(key,i).failed()) ++failed_count;
}
};
template <typename MapType, bool Near>
struct TestErase
{
typedef TestErase<MapType, Near> self_type;
typedef MapType map_type;
typedef typename MapType::execution_space execution_space;
map_type m_map;
uint32_t m_num_erase;
uint32_t m_num_duplicates;
TestErase(map_type map, uint32_t num_erases, uint32_t num_duplicates)
: m_map(map)
, m_num_erase(num_erases)
, m_num_duplicates(num_duplicates)
{}
void testit()
{
execution_space::fence();
Kokkos::parallel_for(m_num_erase, *this);
execution_space::fence();
}
KOKKOS_INLINE_FUNCTION
void operator()(typename execution_space::size_type i) const
{
if (Near) {
m_map.erase(i/m_num_duplicates);
}
else {
m_map.erase(i%(m_num_erase/m_num_duplicates));
}
}
};
template <typename MapType>
struct TestFind
{
typedef MapType map_type;
typedef typename MapType::execution_space::execution_space execution_space;
typedef uint32_t value_type;
map_type m_map;
uint32_t m_num_insert;
uint32_t m_num_duplicates;
uint32_t m_max_key;
TestFind(map_type map, uint32_t num_inserts, uint32_t num_duplicates)
: m_map(map)
, m_num_insert(num_inserts)
, m_num_duplicates(num_duplicates)
, m_max_key( ((num_inserts + num_duplicates) - 1)/num_duplicates )
{}
void testit(value_type &errors)
{
execution_space::execution_space::fence();
Kokkos::parallel_reduce(m_map.capacity(), *this, errors);
execution_space::execution_space::fence();
}
KOKKOS_INLINE_FUNCTION
static void init( value_type & dst)
{
dst = 0;
}
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & dst, const volatile value_type & src)
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(typename execution_space::size_type i, value_type & errors) const
{
const bool expect_to_find_i = (i < m_max_key);
const bool exists = m_map.exists(i);
if (expect_to_find_i && !exists) ++errors;
if (!expect_to_find_i && exists) ++errors;
}
};
} // namespace Impl
template <typename Device>
void test_insert( uint32_t num_nodes , uint32_t num_inserts , uint32_t num_duplicates , bool near )
{
typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type;
typedef Kokkos::UnorderedMap<const uint32_t,const uint32_t, Device> const_map_type;
const uint32_t expected_inserts = (num_inserts + num_duplicates -1u) / num_duplicates;
map_type map;
map.rehash(num_nodes,false);
if (near) {
Impl::TestInsert<map_type,true> test_insert(map, num_inserts, num_duplicates);
test_insert.testit();
} else
{
Impl::TestInsert<map_type,false> test_insert(map, num_inserts, num_duplicates);
test_insert.testit();
}
const bool print_list = false;
if (print_list) {
Kokkos::Impl::UnorderedMapPrint<map_type> f(map);
f.apply();
}
const uint32_t map_size = map.size();
ASSERT_FALSE( map.failed_insert());
{
EXPECT_EQ(expected_inserts, map_size);
{
uint32_t find_errors = 0;
Impl::TestFind<const_map_type> test_find(map, num_inserts, num_duplicates);
test_find.testit(find_errors);
EXPECT_EQ( 0u, find_errors);
}
map.begin_erase();
Impl::TestErase<map_type,false> test_erase(map, num_inserts, num_duplicates);
test_erase.testit();
map.end_erase();
EXPECT_EQ(0u, map.size());
}
}
template <typename Device>
void test_failed_insert( uint32_t num_nodes)
{
typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type;
map_type map(num_nodes);
Impl::TestInsert<map_type> test_insert(map, 2u*num_nodes, 1u);
test_insert.testit(false /*don't rehash on fail*/);
Device::execution_space::fence();
EXPECT_TRUE( map.failed_insert() );
}
template <typename Device>
void test_deep_copy( uint32_t num_nodes )
{
typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type;
typedef Kokkos::UnorderedMap<const uint32_t, const uint32_t, Device> const_map_type;
typedef typename map_type::HostMirror host_map_type ;
// typedef Kokkos::UnorderedMap<uint32_t, uint32_t, typename Device::host_mirror_execution_space > host_map_type;
map_type map;
map.rehash(num_nodes,false);
{
Impl::TestInsert<map_type> test_insert(map, num_nodes, 1);
test_insert.testit();
ASSERT_EQ( map.size(), num_nodes);
ASSERT_FALSE( map.failed_insert() );
{
uint32_t find_errors = 0;
Impl::TestFind<map_type> test_find(map, num_nodes, 1);
test_find.testit(find_errors);
EXPECT_EQ( find_errors, 0u);
}
}
host_map_type hmap;
Kokkos::deep_copy(hmap, map);
ASSERT_EQ( map.size(), hmap.size());
ASSERT_EQ( map.capacity(), hmap.capacity());
{
uint32_t find_errors = 0;
Impl::TestFind<host_map_type> test_find(hmap, num_nodes, 1);
test_find.testit(find_errors);
EXPECT_EQ( find_errors, 0u);
}
map_type mmap;
Kokkos::deep_copy(mmap, hmap);
const_map_type cmap = mmap;
EXPECT_EQ( cmap.size(), num_nodes);
{
uint32_t find_errors = 0;
Impl::TestFind<const_map_type> test_find(cmap, num_nodes, 1);
test_find.testit(find_errors);
EXPECT_EQ( find_errors, 0u);
}
}
} // namespace Test
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP

View File

@ -1,131 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_VECTOR_HPP
#define KOKKOS_TEST_VECTOR_HPP
#include <gtest/gtest.h>
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <impl/Kokkos_Timer.hpp>
namespace Test {
namespace Impl {
template <typename Scalar, class Device>
struct test_vector_combinations
{
typedef test_vector_combinations<Scalar,Device> self_type;
typedef Scalar scalar_type;
typedef Device execution_space;
Scalar reference;
Scalar result;
template <typename Vector>
Scalar run_me(unsigned int n){
Vector a(n,1);
a.push_back(2);
a.resize(n+4);
a[n+1] = 3;
a[n+2] = 4;
a[n+3] = 5;
Scalar temp1 = a[2];
Scalar temp2 = a[n];
Scalar temp3 = a[n+1];
a.assign(n+2,-1);
a[2] = temp1;
a[n] = temp2;
a[n+1] = temp3;
Scalar test1 = 0;
for(unsigned int i=0; i<a.size(); i++)
test1+=a[i];
a.assign(n+1,-2);
Scalar test2 = 0;
for(unsigned int i=0; i<a.size(); i++)
test2+=a[i];
a.reserve(n+10);
Scalar test3 = 0;
for(unsigned int i=0; i<a.size(); i++)
test3+=a[i];
return (test1*test2+test3)*test2+test1*test3;
}
test_vector_combinations(unsigned int size)
{
reference = run_me<std::vector<Scalar> >(size);
result = run_me<Kokkos::vector<Scalar,Device> >(size);
}
};
} // namespace Impl
template <typename Scalar, typename Device>
void test_vector_combinations(unsigned int size)
{
Impl::test_vector_combinations<Scalar,Device> test(size);
ASSERT_EQ( test.reference, test.result);
}
} // namespace Test
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP

View File

@ -1,50 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
int main(int argc, char *argv[]) {
::testing::InitGoogleTest(&argc,argv);
return RUN_ALL_TESTS();
}

View File

@ -1,66 +0,0 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/core/perf_test
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/perf_test
TEST_TARGETS =
TARGETS =
OBJ_PERF = PerfTestHost.o PerfTestCuda.o PerfTestMain.o gtest-all.o
TARGETS += KokkosCore_PerformanceTest
TEST_TARGETS += test-performance
OBJ_ATOMICS = test_atomic.o
TARGETS += KokkosCore_PerformanceTest_Atomics
TEST_TARGETS += test-atomic
KokkosCore_PerformanceTest: $(OBJ_PERF) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_PERF) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest
KokkosCore_PerformanceTest_Atomics: $(OBJ_ATOMICS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_ATOMICS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest_Atomics
test-performance: KokkosCore_PerformanceTest
./KokkosCore_PerformanceTest
test-atomic: KokkosCore_PerformanceTest_Atomics
./KokkosCore_PerformanceTest_Atomics
build_all: $(TARGETS)
test: $(TEST_TARGETS)
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc

View File

@ -1,309 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BLAS_KERNELS_HPP
#define KOKKOS_BLAS_KERNELS_HPP
namespace Kokkos {
template< class ConstVectorType ,
class Device = typename ConstVectorType::execution_space >
struct Dot ;
template< class ConstVectorType ,
class Device = typename ConstVectorType::execution_space >
struct DotSingle ;
template< class ConstScalarType ,
class VectorType ,
class Device = typename VectorType::execution_space >
struct Scale ;
template< class ConstScalarType ,
class ConstVectorType ,
class VectorType ,
class Device = typename VectorType::execution_space >
struct AXPBY ;
/** \brief Y = alpha * X + beta * Y */
template< class ConstScalarType ,
class ConstVectorType ,
class VectorType >
void axpby( const ConstScalarType & alpha ,
const ConstVectorType & X ,
const ConstScalarType & beta ,
const VectorType & Y )
{
typedef AXPBY< ConstScalarType , ConstVectorType , VectorType > functor ;
parallel_for( Y.dimension_0() , functor( alpha , X , beta , Y ) );
}
/** \brief Y *= alpha */
template< class ConstScalarType ,
class VectorType >
void scale( const ConstScalarType & alpha , const VectorType & Y )
{
typedef Scale< ConstScalarType , VectorType > functor ;
parallel_for( Y.dimension_0() , functor( alpha , Y ) );
}
template< class ConstVectorType ,
class Finalize >
void dot( const ConstVectorType & X ,
const ConstVectorType & Y ,
const Finalize & finalize )
{
typedef Dot< ConstVectorType > functor ;
parallel_reduce( X.dimension_0() , functor( X , Y ) , finalize );
}
template< class ConstVectorType ,
class Finalize >
void dot( const ConstVectorType & X ,
const Finalize & finalize )
{
typedef DotSingle< ConstVectorType > functor ;
parallel_reduce( X.dimension_0() , functor( X ) , finalize );
}
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class Type , class Device >
struct Dot
{
typedef typename Device::execution_space execution_space ;
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
Impl::unsigned_< Type::Rank > >::type ok_rank ;
/* typedef typename
Impl::StaticAssertSame< execution_space ,
typename Type::execution_space >::type ok_device ;*/
typedef double value_type ;
#if 1
typename Type::const_type X ;
typename Type::const_type Y ;
#else
Type X ;
Type Y ;
#endif
Dot( const Type & arg_x , const Type & arg_y )
: X(arg_x) , Y(arg_y) { }
KOKKOS_INLINE_FUNCTION
void operator()( int i , value_type & update ) const
{ update += X[i] * Y[i]; }
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & source )
{ update += source; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
};
template< class Type , class Device >
struct DotSingle
{
typedef typename Device::execution_space execution_space ;
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
Impl::unsigned_< Type::Rank > >::type ok_rank ;
/* typedef typename
Impl::StaticAssertSame< execution_space ,
typename Type::execution_space >::type ok_device ;*/
typedef double value_type ;
#if 1
typename Type::const_type X ;
#else
Type X ;
#endif
DotSingle( const Type & arg_x ) : X(arg_x) {}
KOKKOS_INLINE_FUNCTION
void operator()( int i , value_type & update ) const
{
const typename Type::value_type & x = X[i]; update += x * x ;
}
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & source )
{ update += source; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
};
template< class ScalarType , class VectorType , class Device>
struct Scale
{
typedef typename Device::execution_space execution_space ;
/* typedef typename
Impl::StaticAssertSame< execution_space ,
typename ScalarType::execution_space >::type
ok_scalar_device ;
typedef typename
Impl::StaticAssertSame< execution_space ,
typename VectorType::execution_space >::type
ok_vector_device ;*/
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 0 > ,
Impl::unsigned_< ScalarType::Rank > >::type
ok_scalar_rank ;
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
Impl::unsigned_< VectorType::Rank > >::type
ok_vector_rank ;
#if 1
typename ScalarType::const_type alpha ;
#else
ScalarType alpha ;
#endif
VectorType Y ;
Scale( const ScalarType & arg_alpha , const VectorType & arg_Y )
: alpha( arg_alpha ), Y( arg_Y ) {}
KOKKOS_INLINE_FUNCTION
void operator()( int i ) const
{
Y[i] *= alpha() ;
}
};
template< class ScalarType ,
class ConstVectorType ,
class VectorType,
class Device>
struct AXPBY
{
typedef typename Device::execution_space execution_space ;
/* typedef typename
Impl::StaticAssertSame< execution_space ,
typename ScalarType::execution_space >::type
ok_scalar_device ;
typedef typename
Impl::StaticAssertSame< execution_space ,
typename ConstVectorType::execution_space >::type
ok_const_vector_device ;
typedef typename
Impl::StaticAssertSame< execution_space ,
typename VectorType::execution_space >::type
ok_vector_device ;*/
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 0 > ,
Impl::unsigned_< ScalarType::Rank > >::type
ok_scalar_rank ;
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
Impl::unsigned_< ConstVectorType::Rank > >::type
ok_const_vector_rank ;
typedef typename
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
Impl::unsigned_< VectorType::Rank > >::type
ok_vector_rank ;
#if 1
typename ScalarType::const_type alpha , beta ;
typename ConstVectorType::const_type X ;
#else
ScalarType alpha , beta ;
ConstVectorType X ;
#endif
VectorType Y ;
AXPBY( const ScalarType & arg_alpha ,
const ConstVectorType & arg_X ,
const ScalarType & arg_beta ,
const VectorType & arg_Y )
: alpha( arg_alpha ), beta( arg_beta ), X( arg_X ), Y( arg_Y ) {}
KOKKOS_INLINE_FUNCTION
void operator()( int i ) const
{
Y[i] = alpha() * X[i] + beta() * Y[i] ;
}
};
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_BLAS_KERNELS_HPP */

View File

@ -1,189 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <iostream>
#include <iomanip>
#include <algorithm>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#include <impl/Kokkos_Timer.hpp>
#include <PerfTestHexGrad.hpp>
#include <PerfTestBlasKernels.hpp>
#include <PerfTestGramSchmidt.hpp>
#include <PerfTestDriver.hpp>
namespace Test {
class cuda : public ::testing::Test {
protected:
static void SetUpTestCase() {
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase() {
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};
TEST_F( cuda, hexgrad )
{
EXPECT_NO_THROW( run_test_hexgrad< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) );
}
TEST_F( cuda, gramschmidt )
{
EXPECT_NO_THROW( run_test_gramschmidt< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) );
}
namespace {
template <typename T>
struct TextureFetch
{
typedef Kokkos::View< T *, Kokkos::CudaSpace> array_type;
typedef Kokkos::View< const T *, Kokkos::CudaSpace, Kokkos::MemoryRandomAccess> const_array_type;
typedef Kokkos::View< int *, Kokkos::CudaSpace> index_array_type;
typedef Kokkos::View< const int *, Kokkos::CudaSpace> const_index_array_type;
struct FillArray
{
array_type m_array;
FillArray( const array_type & array )
: m_array(array)
{}
void apply() const
{
Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.size()), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(int i) const { m_array(i) = i; }
};
struct RandomIndexes
{
index_array_type m_indexes;
typename index_array_type::HostMirror m_host_indexes;
RandomIndexes( const index_array_type & indexes)
: m_indexes(indexes)
, m_host_indexes(Kokkos::create_mirror(m_indexes))
{}
void apply() const
{
Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::HostSpace::execution_space,int>(0,m_host_indexes.size()), *this);
//random shuffle
Kokkos::HostSpace::execution_space::fence();
std::random_shuffle(m_host_indexes.ptr_on_device(), m_host_indexes.ptr_on_device() + m_host_indexes.size());
Kokkos::deep_copy(m_indexes,m_host_indexes);
}
KOKKOS_INLINE_FUNCTION
void operator()(int i) const { m_host_indexes(i) = i; }
};
struct RandomReduce
{
const_array_type m_array;
const_index_array_type m_indexes;
RandomReduce( const const_array_type & array, const const_index_array_type & indexes)
: m_array(array)
, m_indexes(indexes)
{}
void apply(T & reduce) const
{
Kokkos::parallel_reduce( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.size()), *this, reduce);
}
KOKKOS_INLINE_FUNCTION
void operator()(int i, T & reduce) const
{ reduce += m_array(m_indexes(i)); }
};
static void run(int size, double & reduce_time, T &reduce)
{
array_type array("array",size);
index_array_type indexes("indexes",size);
{ FillArray f(array); f.apply(); }
{ RandomIndexes f(indexes); f.apply(); }
Kokkos::Cuda::fence();
Kokkos::Impl::Timer timer;
for (int j=0; j<10; ++j) {
RandomReduce f(array,indexes);
f.apply(reduce);
}
Kokkos::Cuda::fence();
reduce_time = timer.seconds();
}
};
} // unnamed namespace
TEST_F( cuda, texture_double )
{
printf("Random reduce of double through texture fetch\n");
for (int i=1; i<=27; ++i) {
int size = 1<<i;
double time = 0;
double reduce = 0;
TextureFetch<double>::run(size,time,reduce);
printf(" time = %1.3e size = 2^%d\n", time, i);
}
}
} // namespace Test
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */

View File

@ -1,152 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <iostream>
#include <string>
// mfh 06 Jun 2013: This macro doesn't work like one might thing it
// should. It doesn't take the template parameter DeviceType and
// print its actual type name; it just literally prints out
// "DeviceType". I've worked around this below without using the
// macro, so I'm commenting out the macro to avoid compiler complaints
// about an unused macro.
// #define KOKKOS_MACRO_IMPL_TO_STRING( X ) #X
// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_MACRO_IMPL_TO_STRING( X )
//------------------------------------------------------------------------
namespace Test {
enum { NUMBER_OF_TRIALS = 5 };
template< class DeviceType >
void run_test_hexgrad( int exp_beg , int exp_end, const char deviceTypeName[] )
{
std::string label_hexgrad ;
label_hexgrad.append( "\"HexGrad< double , " );
// mfh 06 Jun 2013: This only appends "DeviceType" (literally) to
// the string, not the actual name of the device type. Thus, I've
// modified the function to take the name of the device type.
//
//label_hexgrad.append( KOKKOS_MACRO_TO_STRING( DeviceType ) );
label_hexgrad.append( deviceTypeName );
label_hexgrad.append( " >\"" );
for (int i = exp_beg ; i < exp_end ; ++i) {
double min_seconds = 0.0 ;
double max_seconds = 0.0 ;
double avg_seconds = 0.0 ;
const int parallel_work_length = 1<<i;
for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) {
const double seconds = HexGrad< DeviceType >::test(parallel_work_length) ;
if ( 0 == j ) {
min_seconds = seconds ;
max_seconds = seconds ;
}
else {
if ( seconds < min_seconds ) min_seconds = seconds ;
if ( seconds > max_seconds ) max_seconds = seconds ;
}
avg_seconds += seconds ;
}
avg_seconds /= NUMBER_OF_TRIALS ;
std::cout << label_hexgrad
<< " , " << parallel_work_length
<< " , " << min_seconds
<< " , " << ( min_seconds / parallel_work_length )
<< std::endl ;
}
}
template< class DeviceType >
void run_test_gramschmidt( int exp_beg , int exp_end, const char deviceTypeName[] )
{
std::string label_gramschmidt ;
label_gramschmidt.append( "\"GramSchmidt< double , " );
// mfh 06 Jun 2013: This only appends "DeviceType" (literally) to
// the string, not the actual name of the device type. Thus, I've
// modified the function to take the name of the device type.
//
//label_gramschmidt.append( KOKKOS_MACRO_TO_STRING( DeviceType ) );
label_gramschmidt.append( deviceTypeName );
label_gramschmidt.append( " >\"" );
for (int i = exp_beg ; i < exp_end ; ++i) {
double min_seconds = 0.0 ;
double max_seconds = 0.0 ;
double avg_seconds = 0.0 ;
const int parallel_work_length = 1<<i;
for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) {
const double seconds = ModifiedGramSchmidt< double , DeviceType >::test(parallel_work_length, 32 ) ;
if ( 0 == j ) {
min_seconds = seconds ;
max_seconds = seconds ;
}
else {
if ( seconds < min_seconds ) min_seconds = seconds ;
if ( seconds > max_seconds ) max_seconds = seconds ;
}
avg_seconds += seconds ;
}
avg_seconds /= NUMBER_OF_TRIALS ;
std::cout << label_gramschmidt
<< " , " << parallel_work_length
<< " , " << min_seconds
<< " , " << ( min_seconds / parallel_work_length )
<< std::endl ;
}
}
}

View File

@ -1,231 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cmath>
#include <PerfTestBlasKernels.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Test {
// Reduction : result = dot( Q(:,j) , Q(:,j) );
// PostProcess : R(j,j) = result ; inv = 1 / result ;
template< class VectorView , class ValueView >
struct InvNorm2 : public Kokkos::DotSingle< VectorView > {
typedef typename Kokkos::DotSingle< VectorView >::value_type value_type ;
ValueView Rjj ;
ValueView inv ;
InvNorm2( const VectorView & argX ,
const ValueView & argR ,
const ValueView & argInv )
: Kokkos::DotSingle< VectorView >( argX )
, Rjj( argR )
, inv( argInv )
{}
KOKKOS_INLINE_FUNCTION
void final( value_type & result ) const
{
result = sqrt( result );
Rjj() = result ;
inv() = ( 0 < result ) ? 1.0 / result : 0 ;
}
};
template< class VectorView , class ValueView >
inline
void invnorm2( const VectorView & x ,
const ValueView & r ,
const ValueView & r_inv )
{
Kokkos::parallel_reduce( x.dimension_0() , InvNorm2< VectorView , ValueView >( x , r , r_inv ) );
}
// PostProcess : tmp = - ( R(j,k) = result );
template< class VectorView , class ValueView >
struct DotM : public Kokkos::Dot< VectorView > {
typedef typename Kokkos::Dot< VectorView >::value_type value_type ;
ValueView Rjk ;
ValueView tmp ;
DotM( const VectorView & argX ,
const VectorView & argY ,
const ValueView & argR ,
const ValueView & argTmp )
: Kokkos::Dot< VectorView >( argX , argY )
, Rjk( argR )
, tmp( argTmp )
{}
KOKKOS_INLINE_FUNCTION
void final( value_type & result ) const
{
Rjk() = result ;
tmp() = - result ;
}
};
template< class VectorView , class ValueView >
inline
void dot_neg( const VectorView & x ,
const VectorView & y ,
const ValueView & r ,
const ValueView & r_neg )
{
Kokkos::parallel_reduce( x.dimension_0() , DotM< VectorView , ValueView >( x , y , r , r_neg ) );
}
template< typename Scalar , class DeviceType >
struct ModifiedGramSchmidt
{
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
typedef Kokkos::View< Scalar** ,
Kokkos::LayoutLeft ,
execution_space > multivector_type ;
typedef Kokkos::View< Scalar* ,
Kokkos::LayoutLeft ,
execution_space > vector_type ;
typedef Kokkos::View< Scalar ,
Kokkos::LayoutLeft ,
execution_space > value_view ;
multivector_type Q ;
multivector_type R ;
static double factorization( const multivector_type Q_ ,
const multivector_type R_ )
{
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
using Kokkos::Experimental::ALL ;
#else
const Kokkos::ALL ALL ;
#endif
const size_type count = Q_.dimension_1();
value_view tmp("tmp");
value_view one("one");
Kokkos::deep_copy( one , (Scalar) 1 );
Kokkos::Impl::Timer timer ;
for ( size_type j = 0 ; j < count ; ++j ) {
// Reduction : tmp = dot( Q(:,j) , Q(:,j) );
// PostProcess : tmp = sqrt( tmp ); R(j,j) = tmp ; tmp = 1 / tmp ;
const vector_type Qj = Kokkos::subview( Q_ , ALL , j );
const value_view Rjj = Kokkos::subview( R_ , j , j );
invnorm2( Qj , Rjj , tmp );
// Q(:,j) *= ( 1 / R(j,j) ); => Q(:,j) *= tmp ;
Kokkos::scale( tmp , Qj );
for ( size_t k = j + 1 ; k < count ; ++k ) {
const vector_type Qk = Kokkos::subview( Q_ , ALL , k );
const value_view Rjk = Kokkos::subview( R_ , j , k );
// Reduction : R(j,k) = dot( Q(:,j) , Q(:,k) );
// PostProcess : tmp = - R(j,k);
dot_neg( Qj , Qk , Rjk , tmp );
// Q(:,k) -= R(j,k) * Q(:,j); => Q(:,k) += tmp * Q(:,j)
Kokkos::axpby( tmp , Qj , one , Qk );
}
}
execution_space::fence();
return timer.seconds();
}
//--------------------------------------------------------------------------
static double test( const size_t length ,
const size_t count ,
const size_t iter = 1 )
{
multivector_type Q_( "Q" , length , count );
multivector_type R_( "R" , count , count );
typename multivector_type::HostMirror A =
Kokkos::create_mirror( Q_ );
// Create and fill A on the host
for ( size_type j = 0 ; j < count ; ++j ) {
for ( size_type i = 0 ; i < length ; ++i ) {
A(i,j) = ( i + 1 ) * ( j + 1 );
}
}
double dt_min = 0 ;
for ( size_t i = 0 ; i < iter ; ++i ) {
Kokkos::deep_copy( Q_ , A );
// A = Q * R
const double dt = factorization( Q_ , R_ );
if ( 0 == i ) dt_min = dt ;
else dt_min = dt < dt_min ? dt : dt_min ;
}
return dt_min ;
}
};
}

View File

@ -1,268 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
namespace Test {
template< class DeviceType ,
typename CoordScalarType = double ,
typename GradScalarType = float >
struct HexGrad
{
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
typedef HexGrad<DeviceType,CoordScalarType,GradScalarType> self_type;
// 3D array : ( ParallelWork , Space , Node )
enum { NSpace = 3 , NNode = 8 };
typedef Kokkos::View< CoordScalarType*[NSpace][NNode] , execution_space >
elem_coord_type ;
typedef Kokkos::View< GradScalarType*[NSpace][NNode] , execution_space >
elem_grad_type ;
elem_coord_type coords ;
elem_grad_type grad_op ;
enum { FLOPS = 318 }; // = 3 * ( 18 + 8 * 11 ) };
enum { READS = 18 };
enum { WRITES = 18 };
HexGrad( const elem_coord_type & arg_coords ,
const elem_grad_type & arg_grad_op )
: coords( arg_coords )
, grad_op( arg_grad_op )
{}
KOKKOS_INLINE_FUNCTION static
void grad( const CoordScalarType x[] ,
const CoordScalarType z[] ,
GradScalarType grad_y[] )
{
const GradScalarType R42=(x[3] - x[1]);
const GradScalarType R52=(x[4] - x[1]);
const GradScalarType R54=(x[4] - x[3]);
const GradScalarType R63=(x[5] - x[2]);
const GradScalarType R83=(x[7] - x[2]);
const GradScalarType R86=(x[7] - x[5]);
const GradScalarType R31=(x[2] - x[0]);
const GradScalarType R61=(x[5] - x[0]);
const GradScalarType R74=(x[6] - x[3]);
const GradScalarType R72=(x[6] - x[1]);
const GradScalarType R75=(x[6] - x[4]);
const GradScalarType R81=(x[7] - x[0]);
const GradScalarType t1=(R63 + R54);
const GradScalarType t2=(R61 + R74);
const GradScalarType t3=(R72 + R81);
const GradScalarType t4 =(R86 + R42);
const GradScalarType t5 =(R83 + R52);
const GradScalarType t6 =(R75 + R31);
// Calculate Y gradient from X and Z data
grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54);
grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61);
grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72);
grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83);
grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61);
grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72);
grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83);
grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54);
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type ielem ) const
{
GradScalarType g[NNode] ;
const CoordScalarType x[NNode] = {
coords(ielem,0,0),
coords(ielem,0,1),
coords(ielem,0,2),
coords(ielem,0,3),
coords(ielem,0,4),
coords(ielem,0,5),
coords(ielem,0,6),
coords(ielem,0,7)
};
const CoordScalarType y[NNode] = {
coords(ielem,1,0),
coords(ielem,1,1),
coords(ielem,1,2),
coords(ielem,1,3),
coords(ielem,1,4),
coords(ielem,1,5),
coords(ielem,1,6),
coords(ielem,1,7)
};
const CoordScalarType z[NNode] = {
coords(ielem,2,0),
coords(ielem,2,1),
coords(ielem,2,2),
coords(ielem,2,3),
coords(ielem,2,4),
coords(ielem,2,5),
coords(ielem,2,6),
coords(ielem,2,7)
};
grad( z , y , g );
grad_op(ielem,0,0) = g[0];
grad_op(ielem,0,1) = g[1];
grad_op(ielem,0,2) = g[2];
grad_op(ielem,0,3) = g[3];
grad_op(ielem,0,4) = g[4];
grad_op(ielem,0,5) = g[5];
grad_op(ielem,0,6) = g[6];
grad_op(ielem,0,7) = g[7];
grad( x , z , g );
grad_op(ielem,1,0) = g[0];
grad_op(ielem,1,1) = g[1];
grad_op(ielem,1,2) = g[2];
grad_op(ielem,1,3) = g[3];
grad_op(ielem,1,4) = g[4];
grad_op(ielem,1,5) = g[5];
grad_op(ielem,1,6) = g[6];
grad_op(ielem,1,7) = g[7];
grad( y , x , g );
grad_op(ielem,2,0) = g[0];
grad_op(ielem,2,1) = g[1];
grad_op(ielem,2,2) = g[2];
grad_op(ielem,2,3) = g[3];
grad_op(ielem,2,4) = g[4];
grad_op(ielem,2,5) = g[5];
grad_op(ielem,2,6) = g[6];
grad_op(ielem,2,7) = g[7];
}
//--------------------------------------------------------------------------
struct Init {
typedef typename self_type::execution_space execution_space ;
elem_coord_type coords ;
Init( const elem_coord_type & arg_coords )
: coords( arg_coords ) {}
KOKKOS_INLINE_FUNCTION
void operator()( size_type ielem ) const
{
coords(ielem,0,0) = 0.;
coords(ielem,1,0) = 0.;
coords(ielem,2,0) = 0.;
coords(ielem,0,1) = 1.;
coords(ielem,1,1) = 0.;
coords(ielem,2,1) = 0.;
coords(ielem,0,2) = 1.;
coords(ielem,1,2) = 1.;
coords(ielem,2,2) = 0.;
coords(ielem,0,3) = 0.;
coords(ielem,1,3) = 1.;
coords(ielem,2,3) = 0.;
coords(ielem,0,4) = 0.;
coords(ielem,1,4) = 0.;
coords(ielem,2,4) = 1.;
coords(ielem,0,5) = 1.;
coords(ielem,1,5) = 0.;
coords(ielem,2,5) = 1.;
coords(ielem,0,6) = 1.;
coords(ielem,1,6) = 1.;
coords(ielem,2,6) = 1.;
coords(ielem,0,7) = 0.;
coords(ielem,1,7) = 1.;
coords(ielem,2,7) = 1.;
}
};
//--------------------------------------------------------------------------
static double test( const int count , const int iter = 1 )
{
elem_coord_type coord( "coord" , count );
elem_grad_type grad ( "grad" , count );
// Execute the parallel kernels on the arrays:
double dt_min = 0 ;
Kokkos::parallel_for( count , Init( coord ) );
execution_space::fence();
for ( int i = 0 ; i < iter ; ++i ) {
Kokkos::Impl::Timer timer ;
Kokkos::parallel_for( count , HexGrad<execution_space>( coord , grad ) );
execution_space::fence();
const double dt = timer.seconds();
if ( 0 == i ) dt_min = dt ;
else dt_min = dt < dt_min ? dt : dt_min ;
}
return dt_min ;
}
};
}

View File

@ -1,104 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_OPENMP )
typedef Kokkos::OpenMP TestHostDevice ;
const char TestHostDeviceName[] = "Kokkos::OpenMP" ;
#elif defined( KOKKOS_HAVE_PTHREAD )
typedef Kokkos::Threads TestHostDevice ;
const char TestHostDeviceName[] = "Kokkos::Threads" ;
#elif defined( KOKKOS_HAVE_SERIAL )
typedef Kokkos::Serial TestHostDevice ;
const char TestHostDeviceName[] = "Kokkos::Serial" ;
#else
# error "You must enable at least one of the following execution spaces in order to build this test: Kokkos::Threads, Kokkos::OpenMP, or Kokkos::Serial."
#endif
#include <impl/Kokkos_Timer.hpp>
#include <PerfTestHexGrad.hpp>
#include <PerfTestBlasKernels.hpp>
#include <PerfTestGramSchmidt.hpp>
#include <PerfTestDriver.hpp>
//------------------------------------------------------------------------
namespace Test {
class host : public ::testing::Test {
protected:
static void SetUpTestCase()
{
const unsigned team_count = Kokkos::hwloc::get_available_numa_count();
const unsigned threads_per_team = 4 ;
TestHostDevice::initialize( team_count * threads_per_team );
}
static void TearDownTestCase()
{
TestHostDevice::finalize();
}
};
TEST_F( host, hexgrad ) {
EXPECT_NO_THROW(run_test_hexgrad< TestHostDevice>( 10, 20, TestHostDeviceName ));
}
TEST_F( host, gramschmidt ) {
EXPECT_NO_THROW(run_test_gramschmidt< TestHostDevice>( 10, 20, TestHostDeviceName ));
}
} // namespace Test

View File

@ -1,49 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
int main(int argc, char *argv[]) {
::testing::InitGoogleTest(&argc,argv);
return RUN_ALL_TESTS();
}

View File

@ -1,504 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
typedef Kokkos::DefaultExecutionSpace exec_space;
#define RESET 0
#define BRIGHT 1
#define DIM 2
#define UNDERLINE 3
#define BLINK 4
#define REVERSE 7
#define HIDDEN 8
#define BLACK 0
#define RED 1
#define GREEN 2
#define YELLOW 3
#define BLUE 4
#define MAGENTA 5
#define CYAN 6
#define GREY 7
#define WHITE 8
void textcolor(int attr, int fg, int bg)
{ char command[13];
/* Command is the control command to the terminal */
sprintf(command, "%c[%d;%d;%dm", 0x1B, attr, fg + 30, bg + 40);
printf("%s", command);
}
void textcolor_standard() {textcolor(RESET, BLACK, WHITE);}
template<class T,class DEVICE_TYPE>
struct ZeroFunctor{
typedef DEVICE_TYPE execution_space;
typedef typename Kokkos::View<T,execution_space> type;
typedef typename Kokkos::View<T,execution_space>::HostMirror h_type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
data() = 0;
}
};
//---------------------------------------------------
//--------------atomic_fetch_add---------------------
//---------------------------------------------------
template<class T,class DEVICE_TYPE>
struct AddFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
Kokkos::atomic_fetch_add(&data(),(T)1);
}
};
template<class T>
T AddLoop(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct AddFunctor<T,exec_space> f_add;
f_add.data = data;
Kokkos::parallel_for(loop,f_add);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T,class DEVICE_TYPE>
struct AddNonAtomicFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
data()+=(T)1;
}
};
template<class T>
T AddLoopNonAtomic(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct AddNonAtomicFunctor<T,exec_space> f_add;
f_add.data = data;
Kokkos::parallel_for(loop,f_add);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T>
T AddLoopSerial(int loop) {
T* data = new T[1];
data[0] = 0;
for(int i=0;i<loop;i++)
*data+=(T)1;
T val = *data;
delete data;
return val;
}
template<class T,class DEVICE_TYPE>
struct CASFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
T old = data();
T newval, assumed;
do {
assumed = old;
newval = assumed + (T)1;
old = Kokkos::atomic_compare_exchange(&data(), assumed, newval);
}
while( old != assumed );
}
};
template<class T>
T CASLoop(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct CASFunctor<T,exec_space> f_cas;
f_cas.data = data;
Kokkos::parallel_for(loop,f_cas);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T,class DEVICE_TYPE>
struct CASNonAtomicFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
volatile T assumed;
volatile T newval;
bool fail=1;
do {
assumed = data();
newval = assumed + (T)1;
if(data()==assumed) {
data() = newval;
fail = 0;
}
}
while(fail);
}
};
template<class T>
T CASLoopNonAtomic(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct CASNonAtomicFunctor<T,exec_space> f_cas;
f_cas.data = data;
Kokkos::parallel_for(loop,f_cas);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
T val = h_data();
return val;
}
template<class T>
T CASLoopSerial(int loop) {
T* data = new T[1];
data[0] = 0;
for(int i=0;i<loop;i++) {
T assumed;
T newval;
T old;
do {
assumed = *data;
newval = assumed + (T)1;
old = *data;
*data = newval;
}
while(!(assumed==old));
}
T val = *data;
delete data;
return val;
}
template<class T,class DEVICE_TYPE>
struct ExchFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data, data2;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
T old = Kokkos::atomic_exchange(&data(),(T)i);
Kokkos::atomic_fetch_add(&data2(),old);
}
};
template<class T>
T ExchLoop(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
typename ZeroFunctor<T,exec_space>::type data2("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data2("HData");
f_zero.data = data2;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct ExchFunctor<T,exec_space> f_exch;
f_exch.data = data;
f_exch.data2 = data2;
Kokkos::parallel_for(loop,f_exch);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
Kokkos::deep_copy(h_data2,data2);
T val = h_data() + h_data2();
return val;
}
template<class T,class DEVICE_TYPE>
struct ExchNonAtomicFunctor{
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View<T,execution_space> type;
type data, data2;
KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
T old = data();
data()=(T) i;
data2()+=old;
}
};
template<class T>
T ExchLoopNonAtomic(int loop) {
struct ZeroFunctor<T,exec_space> f_zero;
typename ZeroFunctor<T,exec_space>::type data("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
typename ZeroFunctor<T,exec_space>::type data2("Data");
typename ZeroFunctor<T,exec_space>::h_type h_data2("HData");
f_zero.data = data2;
Kokkos::parallel_for(1,f_zero);
exec_space::fence();
struct ExchNonAtomicFunctor<T,exec_space> f_exch;
f_exch.data = data;
f_exch.data2 = data2;
Kokkos::parallel_for(loop,f_exch);
exec_space::fence();
Kokkos::deep_copy(h_data,data);
Kokkos::deep_copy(h_data2,data2);
T val = h_data() + h_data2();
return val;
}
template<class T>
T ExchLoopSerial(int loop) {
T* data = new T[1];
T* data2 = new T[1];
data[0] = 0;
data2[0] = 0;
for(int i=0;i<loop;i++) {
T old = *data;
*data=(T) i;
*data2+=old;
}
T val = *data2 + *data;
delete data;
delete data2;
return val;
}
template<class T>
T LoopVariant(int loop, int test) {
switch (test) {
case 1: return AddLoop<T>(loop);
case 2: return CASLoop<T>(loop);
case 3: return ExchLoop<T>(loop);
}
return 0;
}
template<class T>
T LoopVariantSerial(int loop, int test) {
switch (test) {
case 1: return AddLoopSerial<T>(loop);
case 2: return CASLoopSerial<T>(loop);
case 3: return ExchLoopSerial<T>(loop);
}
return 0;
}
template<class T>
T LoopVariantNonAtomic(int loop, int test) {
switch (test) {
case 1: return AddLoopNonAtomic<T>(loop);
case 2: return CASLoopNonAtomic<T>(loop);
case 3: return ExchLoopNonAtomic<T>(loop);
}
return 0;
}
template<class T>
void Loop(int loop, int test, const char* type_name) {
LoopVariant<T>(loop,test);
Kokkos::Impl::Timer timer;
T res = LoopVariant<T>(loop,test);
double time1 = timer.seconds();
timer.reset();
T resNonAtomic = LoopVariantNonAtomic<T>(loop,test);
double time2 = timer.seconds();
timer.reset();
T resSerial = LoopVariantSerial<T>(loop,test);
double time3 = timer.seconds();
time1*=1e6/loop;
time2*=1e6/loop;
time3*=1e6/loop;
//textcolor_standard();
bool passed = true;
if(resSerial!=res) passed = false;
//if(!passed) textcolor(RESET,BLACK,YELLOW);
printf("%s Test %i %s --- Loop: %i Value (S,A,NA): %e %e %e Time: %7.4e %7.4e %7.4e Size of Type %i)",type_name,test,passed?"PASSED":"FAILED",loop,1.0*resSerial,1.0*res,1.0*resNonAtomic,time1,time2,time3,(int)sizeof(T));
//if(!passed) textcolor_standard();
printf("\n");
}
template<class T>
void Test(int loop, int test, const char* type_name) {
if(test==-1) {
Loop<T>(loop,1,type_name);
Loop<T>(loop,2,type_name);
Loop<T>(loop,3,type_name);
}
else
Loop<T>(loop,test,type_name);
}
int main(int argc, char* argv[])
{
int type = -1;
int loop = 1000000;
int test = -1;
for(int i=0;i<argc;i++)
{
if((strcmp(argv[i],"--test")==0)) {test=atoi(argv[++i]); continue;}
if((strcmp(argv[i],"--type")==0)) {type=atoi(argv[++i]); continue;}
if((strcmp(argv[i],"-l")==0)||(strcmp(argv[i],"--loop")==0)) {loop=atoi(argv[++i]); continue;}
}
Kokkos::initialize(argc,argv);
printf("Using %s\n",Kokkos::atomic_query_version());
bool all_tests = false;
if(type==-1) all_tests = true;
while(type<100) {
if(type==1) {
Test<int>(loop,test,"int ");
}
if(type==2) {
Test<long int>(loop,test,"long int ");
}
if(type==3) {
Test<long long int>(loop,test,"long long int ");
}
if(type==4) {
Test<unsigned int>(loop,test,"unsigned int ");
}
if(type==5) {
Test<unsigned long int>(loop,test,"unsigned long int ");
}
if(type==6) {
Test<unsigned long long int>(loop,test,"unsigned long long int ");
}
if(type==10) {
//Test<float>(loop,test,"float ");
}
if(type==11) {
Test<double>(loop,test,"double ");
}
if(!all_tests) type=100;
else type++;
}
Kokkos::finalize();
}

View File

@ -1,283 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( KOKKOS_HAVE_CUDA )
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
//----------------------------------------------------------------------------
// Cuda Texture fetches can be performed for 4, 8 and 16 byte objects (int,int2,int4)
// Via reinterpret_case this can be used to support all scalar types of those sizes.
// Any other scalar type falls back to either normal reads out of global memory,
// or using the __ldg intrinsic on Kepler GPUs or newer (Compute Capability >= 3.0)
template< typename ValueType , typename AliasType >
struct CudaTextureFetch {
::cudaTextureObject_t m_obj ;
const ValueType * m_ptr ;
int m_offset ;
// Deference operator pulls through texture object and returns by value
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
AliasType v = tex1Dfetch<AliasType>( m_obj , i + m_offset );
return *(reinterpret_cast<ValueType*> (&v));
#else
return m_ptr[ i ];
#endif
}
// Pointer to referenced memory
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_ptr ; }
KOKKOS_INLINE_FUNCTION
CudaTextureFetch() : m_obj() , m_ptr() , m_offset() {}
KOKKOS_INLINE_FUNCTION
~CudaTextureFetch() {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const CudaTextureFetch & rhs )
: m_obj( rhs.m_obj )
, m_ptr( rhs.m_ptr )
, m_offset( rhs.m_offset )
{}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( CudaTextureFetch && rhs )
: m_obj( rhs.m_obj )
, m_ptr( rhs.m_ptr )
, m_offset( rhs.m_offset )
{}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs )
{
m_obj = rhs.m_obj ;
m_ptr = rhs.m_ptr ;
m_offset = rhs.m_offset ;
return *this ;
}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( CudaTextureFetch && rhs )
{
m_obj = rhs.m_obj ;
m_ptr = rhs.m_ptr ;
m_offset = rhs.m_offset ;
return *this ;
}
// Texture object spans the entire allocation.
// This handle may view a subset of the allocation, so an offset is required.
template< class CudaMemorySpace >
inline explicit
CudaTextureFetch( const ValueType * const arg_ptr
, Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > & record
)
// 'attach_texture_object' returns 0 when __CUDA_ARCH__ < 300
: m_obj( record.template attach_texture_object< AliasType >() )
, m_ptr( arg_ptr )
, m_offset( record.attach_texture_object_offset( reinterpret_cast<const AliasType*>( arg_ptr ) ) )
{}
};
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
template< typename ValueType , typename AliasType >
struct CudaLDGFetch {
const ValueType * m_ptr ;
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
AliasType v = __ldg(reinterpret_cast<AliasType*>(&m_ptr[i]));
return *(reinterpret_cast<ValueType*> (&v));
}
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_ptr ; }
KOKKOS_INLINE_FUNCTION
CudaLDGFetch() : m_ptr() {}
KOKKOS_INLINE_FUNCTION
~CudaLDGFetch() {}
KOKKOS_INLINE_FUNCTION
CudaLDGFetch( const CudaLDGFetch & rhs )
: m_ptr( rhs.m_ptr )
{}
KOKKOS_INLINE_FUNCTION
CudaLDGFetch( CudaLDGFetch && rhs )
: m_ptr( rhs.m_ptr )
{}
KOKKOS_INLINE_FUNCTION
CudaLDGFetch & operator = ( const CudaLDGFetch & rhs )
{
m_ptr = rhs.m_ptr ;
return *this ;
}
KOKKOS_INLINE_FUNCTION
CudaLDGFetch & operator = ( CudaLDGFetch && rhs )
{
m_ptr = rhs.m_ptr ;
return *this ;
}
template< class CudaMemorySpace >
inline explicit
CudaTextureFetch( const ValueType * const arg_ptr
, Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > const &
)
: m_ptr( arg_data_ptr )
{}
};
#endif
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
/** \brief Replace Default ViewDataHandle with Cuda texture fetch specialization
* if 'const' value type, CudaSpace and random access.
*/
template< class Traits >
class ViewDataHandle< Traits ,
typename std::enable_if<(
// Is Cuda memory space
( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )
&&
// Is a trivial const value of 4, 8, or 16 bytes
std::is_trivial<typename Traits::const_value_type>::value
&&
std::is_same<typename Traits::const_value_type,typename Traits::value_type>::value
&&
( sizeof(typename Traits::const_value_type) == 4 ||
sizeof(typename Traits::const_value_type) == 8 ||
sizeof(typename Traits::const_value_type) == 16 )
&&
// Random access trait
( Traits::memory_traits::RandomAccess != 0 )
)>::type >
{
public:
using track_type = Kokkos::Experimental::Impl::SharedAllocationTracker ;
using value_type = typename Traits::const_value_type ;
using return_type = typename Traits::const_value_type ; // NOT a reference
using alias_type = typename std::conditional< ( sizeof(value_type) == 4 ) , int ,
typename std::conditional< ( sizeof(value_type) == 8 ) , ::int2 ,
typename std::conditional< ( sizeof(value_type) == 16 ) , ::int4 , void
>::type
>::type
>::type ;
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ;
#else
using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ;
#endif
KOKKOS_INLINE_FUNCTION
static handle_type const & assign( handle_type const & arg_handle , track_type const & /* arg_tracker */ )
{
return arg_handle ;
}
KOKKOS_INLINE_FUNCTION
static handle_type assign( value_type * arg_data_ptr, track_type const & arg_tracker )
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
// Assignment of texture = non-texture requires creation of a texture object
// which can only occur on the host. In addition, 'get_record' is only valid
// if called in a host execution space
return handle_type( arg_data_ptr , arg_tracker.template get_record< typename Traits::memory_space >() );
#else
Kokkos::Impl::cuda_abort("Cannot create Cuda texture object from within a Cuda kernel");
return handle_type();
#endif
}
};
}
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */

View File

@ -1,277 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDAEXEC_HPP
#define KOKKOS_CUDAEXEC_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <string>
#include <Kokkos_Parallel.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Cuda/Kokkos_Cuda_abort.hpp>
#include <Cuda/Kokkos_Cuda_Error.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
struct CudaTraits {
enum { WarpSize = 32 /* 0x0020 */ };
enum { WarpIndexMask = 0x001f /* Mask for warpindex */ };
enum { WarpIndexShift = 5 /* WarpSize == 1 << WarpShift */ };
enum { SharedMemoryBanks = 32 /* Compute device 2.0 */ };
enum { SharedMemoryCapacity = 0x0C000 /* 48k shared / 16k L1 Cache */ };
enum { SharedMemoryUsage = 0x04000 /* 16k shared / 48k L1 Cache */ };
enum { UpperBoundGridCount = 65535 /* Hard upper bound */ };
enum { ConstantMemoryCapacity = 0x010000 /* 64k bytes */ };
enum { ConstantMemoryUsage = 0x008000 /* 32k bytes */ };
enum { ConstantMemoryCache = 0x002000 /* 8k bytes */ };
typedef unsigned long
ConstantGlobalBufferType[ ConstantMemoryUsage / sizeof(unsigned long) ];
enum { ConstantMemoryUseThreshold = 0x000200 /* 512 bytes */ };
KOKKOS_INLINE_FUNCTION static
CudaSpace::size_type warp_count( CudaSpace::size_type i )
{ return ( i + WarpIndexMask ) >> WarpIndexShift ; }
KOKKOS_INLINE_FUNCTION static
CudaSpace::size_type warp_align( CudaSpace::size_type i )
{
enum { Mask = ~CudaSpace::size_type( WarpIndexMask ) };
return ( i + WarpIndexMask ) & Mask ;
}
};
//----------------------------------------------------------------------------
CudaSpace::size_type cuda_internal_maximum_warp_count();
CudaSpace::size_type cuda_internal_maximum_grid_count();
CudaSpace::size_type cuda_internal_maximum_shared_words();
CudaSpace::size_type * cuda_internal_scratch_flags( const CudaSpace::size_type size );
CudaSpace::size_type * cuda_internal_scratch_space( const CudaSpace::size_type size );
CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type size );
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( __CUDACC__ )
/** \brief Access to constant memory on the device */
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
extern
#endif
__device__ __constant__
Kokkos::Impl::CudaTraits::ConstantGlobalBufferType
kokkos_impl_cuda_constant_memory_buffer ;
__device__ __constant__
int* kokkos_impl_cuda_atomic_lock_array ;
#define CUDA_SPACE_ATOMIC_MASK 0x1FFFF
#define CUDA_SPACE_ATOMIC_XOR_MASK 0x15A39
namespace Kokkos {
namespace Impl {
__device__ inline
bool lock_address_cuda_space(void* ptr) {
size_t offset = size_t(ptr);
offset = offset >> 2;
offset = offset & CUDA_SPACE_ATOMIC_MASK;
//offset = offset xor CUDA_SPACE_ATOMIC_XOR_MASK;
return (0 == atomicCAS(&kokkos_impl_cuda_atomic_lock_array[offset],0,1));
}
__device__ inline
void unlock_address_cuda_space(void* ptr) {
size_t offset = size_t(ptr);
offset = offset >> 2;
offset = offset & CUDA_SPACE_ATOMIC_MASK;
//offset = offset xor CUDA_SPACE_ATOMIC_XOR_MASK;
atomicExch( &kokkos_impl_cuda_atomic_lock_array[ offset ], 0);
}
}
}
template< typename T >
inline
__device__
T * kokkos_impl_cuda_shared_memory()
{ extern __shared__ Kokkos::CudaSpace::size_type sh[]; return (T*) sh ; }
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
// See section B.17 of Cuda C Programming Guide Version 3.2
// for discussion of
// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor)
// function qualifier which could be used to improve performance.
//----------------------------------------------------------------------------
// Maximize L1 cache and minimize shared memory:
// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferL1 );
// For 2.0 capability: 48 KB L1 and 16 KB shared
//----------------------------------------------------------------------------
template< class DriverType >
__global__
static void cuda_parallel_launch_constant_memory()
{
const DriverType & driver =
*((const DriverType *) kokkos_impl_cuda_constant_memory_buffer );
driver();
}
template< class DriverType >
__global__
static void cuda_parallel_launch_local_memory( const DriverType driver )
{
driver();
}
template < class DriverType ,
bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) ) >
struct CudaParallelLaunch ;
template < class DriverType >
struct CudaParallelLaunch< DriverType , true > {
inline
CudaParallelLaunch( const DriverType & driver
, const dim3 & grid
, const dim3 & block
, const int shmem
, const cudaStream_t stream = 0 )
{
if ( grid.x && ( block.x * block.y * block.z ) ) {
if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) <
sizeof( DriverType ) ) {
Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") );
}
if ( CudaTraits::SharedMemoryCapacity < shmem ) {
Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") );
}
else if ( shmem ) {
cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferShared );
} else {
cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferL1 );
}
// Copy functor to constant memory on the device
cudaMemcpyToSymbol( kokkos_impl_cuda_constant_memory_buffer , & driver , sizeof(DriverType) );
int* lock_array_ptr = lock_array_cuda_space_ptr();
cudaMemcpyToSymbol( kokkos_impl_cuda_atomic_lock_array , & lock_array_ptr , sizeof(int*) );
// Invoke the driver function on the device
cuda_parallel_launch_constant_memory< DriverType ><<< grid , block , shmem , stream >>>();
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
Kokkos::Cuda::fence();
CUDA_SAFE_CALL( cudaGetLastError() );
#endif
}
}
};
template < class DriverType >
struct CudaParallelLaunch< DriverType , false > {
inline
CudaParallelLaunch( const DriverType & driver
, const dim3 & grid
, const dim3 & block
, const int shmem
, const cudaStream_t stream = 0 )
{
if ( grid.x && ( block.x * block.y * block.z ) ) {
if ( CudaTraits::SharedMemoryCapacity < shmem ) {
Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") );
}
else if ( shmem ) {
cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferShared );
} else {
cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferL1 );
}
int* lock_array_ptr = lock_array_cuda_space_ptr();
cudaMemcpyToSymbol( kokkos_impl_cuda_atomic_lock_array , & lock_array_ptr , sizeof(int*) );
cuda_parallel_launch_local_memory< DriverType ><<< grid , block , shmem , stream >>>( driver );
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
Kokkos::Cuda::fence();
CUDA_SAFE_CALL( cudaGetLastError() );
#endif
}
}
};
//----------------------------------------------------------------------------
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* defined( __CUDACC__ ) */
#endif /* defined( KOKKOS_HAVE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */

View File

@ -1,670 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdlib.h>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <Kokkos_Cuda.hpp>
#include <Kokkos_CudaSpace.hpp>
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
#include <Cuda/Kokkos_Cuda_Internal.hpp>
#include <impl/Kokkos_Error.hpp>
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
DeepCopy<CudaSpace,CudaSpace>::DeepCopy( void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }
DeepCopy<CudaSpace,CudaSpace>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }
DeepCopy<HostSpace,CudaSpace>::DeepCopy( void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }
DeepCopy<HostSpace,CudaSpace>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }
DeepCopy<CudaSpace,HostSpace>::DeepCopy( void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }
DeepCopy<CudaSpace,HostSpace>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace {
void texture_object_attach_impl( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
)
{
enum { TEXTURE_BOUND_1D = 2u << 27 };
if ( tracker.attribute() == NULL ) {
// check for correct allocator
const bool ok_alloc = tracker.allocator()->support_texture_binding();
const bool ok_count = (tracker.alloc_size() / type_size) < TEXTURE_BOUND_1D;
if (ok_alloc && ok_count) {
Impl::TextureAttribute * attr = new Impl::TextureAttribute( tracker.alloc_ptr(), tracker.alloc_size(), desc );
tracker.set_attribute( attr );
}
else {
std::ostringstream oss;
oss << "Error: Cannot attach texture object";
if (!ok_alloc) {
oss << ", incompatabile allocator " << tracker.allocator()->name();
}
if (!ok_count) {
oss << ", array " << tracker.label() << " too large";
}
oss << ".";
Kokkos::Impl::throw_runtime_exception( oss.str() );
}
}
if ( NULL == dynamic_cast<Impl::TextureAttribute *>(tracker.attribute()) ) {
std::ostringstream oss;
oss << "Error: Allocation " << tracker.label() << " already has an attribute attached.";
Kokkos::Impl::throw_runtime_exception( oss.str() );
}
}
} // unnamed namespace
/*--------------------------------------------------------------------------*/
Impl::AllocationTracker CudaSpace::allocate_and_track( const std::string & label, const size_t size )
{
return Impl::AllocationTracker( allocator(), size, label);
}
void CudaSpace::texture_object_attach( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
)
{
texture_object_attach_impl( tracker, type_size, desc );
}
void CudaSpace::access_error()
{
const std::string msg("Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space" );
Kokkos::Impl::throw_runtime_exception( msg );
}
void CudaSpace::access_error( const void * const )
{
const std::string msg("Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space" );
Kokkos::Impl::throw_runtime_exception( msg );
}
/*--------------------------------------------------------------------------*/
Impl::AllocationTracker CudaUVMSpace::allocate_and_track( const std::string & label, const size_t size )
{
return Impl::AllocationTracker( allocator(), size, label);
}
void CudaUVMSpace::texture_object_attach( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
)
{
texture_object_attach_impl( tracker, type_size, desc );
}
bool CudaUVMSpace::available()
{
#if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && !defined(__APPLE__)
enum { UVM_available = true };
#else
enum { UVM_available = false };
#endif
return UVM_available;
}
/*--------------------------------------------------------------------------*/
Impl::AllocationTracker CudaHostPinnedSpace::allocate_and_track( const std::string & label, const size_t size )
{
return Impl::AllocationTracker( allocator(), size, label);
}
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
CudaSpace::CudaSpace()
: m_device( Kokkos::Cuda().cuda_device() )
{
}
CudaUVMSpace::CudaUVMSpace()
: m_device( Kokkos::Cuda().cuda_device() )
{
}
CudaHostPinnedSpace::CudaHostPinnedSpace()
{
}
void * CudaSpace::allocate( const size_t arg_alloc_size ) const
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaMalloc( &ptr, arg_alloc_size ) );
return ptr ;
}
void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) );
return ptr ;
}
void * CudaHostPinnedSpace::allocate( const size_t arg_alloc_size ) const
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaHostAlloc( &ptr, arg_alloc_size , cudaHostAllocDefault ) );
return ptr ;
}
void CudaSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
{
try {
CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
} catch(...) {}
}
void CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
{
try {
CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
} catch(...) {}
}
void CudaHostPinnedSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
{
try {
CUDA_SAFE_CALL( cudaFreeHost( arg_alloc_ptr ) );
} catch(...) {}
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::CudaSpace , void >::s_root_record ;
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::s_root_record ;
SharedAllocationRecord< void , void >
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::s_root_record ;
::cudaTextureObject_t
SharedAllocationRecord< Kokkos::CudaSpace , void >::
attach_texture_object( const unsigned sizeof_alias
, void * const alloc_ptr
, size_t const alloc_size )
{
// Only valid for 300 <= __CUDA_ARCH__
// otherwise return zero.
::cudaTextureObject_t tex_obj ;
struct cudaResourceDesc resDesc ;
struct cudaTextureDesc texDesc ;
memset( & resDesc , 0 , sizeof(resDesc) );
memset( & texDesc , 0 , sizeof(texDesc) );
resDesc.resType = cudaResourceTypeLinear ;
resDesc.res.linear.desc = ( sizeof_alias == 4 ? cudaCreateChannelDesc< int >() :
( sizeof_alias == 8 ? cudaCreateChannelDesc< ::int2 >() :
/* sizeof_alias == 16 */ cudaCreateChannelDesc< ::int4 >() ) );
resDesc.res.linear.sizeInBytes = alloc_size ;
resDesc.res.linear.devPtr = alloc_ptr ;
CUDA_SAFE_CALL( cudaCreateTextureObject( & tex_obj , & resDesc, & texDesc, NULL ) );
return tex_obj ;
}
std::string
SharedAllocationRecord< Kokkos::CudaSpace , void >::get_label() const
{
SharedAllocationHeader header ;
Kokkos::Impl::DeepCopy< Kokkos::HostSpace , Kokkos::CudaSpace >( & header , RecordBase::head() , sizeof(SharedAllocationHeader) );
return std::string( header.m_label );
}
std::string
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_label() const
{
return std::string( RecordBase::head()->m_label );
}
std::string
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_label() const
{
return std::string( RecordBase::head()->m_label );
}
SharedAllocationRecord< Kokkos::CudaSpace , void > *
SharedAllocationRecord< Kokkos::CudaSpace , void >::
allocate( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
)
{
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
}
SharedAllocationRecord< Kokkos::CudaUVMSpace , void > *
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
allocate( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
)
{
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
}
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > *
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
allocate( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
)
{
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
}
void
SharedAllocationRecord< Kokkos::CudaSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
void
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
void
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
deallocate( SharedAllocationRecord< void , void > * arg_rec )
{
delete static_cast<SharedAllocationRecord*>(arg_rec);
}
SharedAllocationRecord< Kokkos::CudaSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
~SharedAllocationRecord()
{
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
, SharedAllocationRecord< void , void >::m_alloc_size
);
}
SharedAllocationRecord< Kokkos::CudaSpace , void >::
SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::CudaSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_tex_obj( 0 )
, m_space( arg_space )
{
SharedAllocationHeader header ;
// Fill in the Header information
header.m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
strncpy( header.m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
// Copy to device memory
Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>::DeepCopy( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) );
}
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_tex_obj( 0 )
, m_space( arg_space )
{
// Fill in the Header information, directly accessible via UVM
RecordBase::m_alloc_ptr->m_record = this ;
strncpy( RecordBase::m_alloc_ptr->m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
}
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
)
// Pass through allocated [ SharedAllocationHeader , user_memory ]
// Pass through deallocation function
: SharedAllocationRecord< void , void >
( & SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::s_root_record
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
, sizeof(SharedAllocationHeader) + arg_alloc_size
, arg_dealloc
)
, m_space( arg_space )
{
// Fill in the Header information, directly accessible via UVM
RecordBase::m_alloc_ptr->m_record = this ;
strncpy( RecordBase::m_alloc_ptr->m_label
, arg_label.c_str()
, SharedAllocationHeader::maximum_label_length
);
}
SharedAllocationRecord< Kokkos::CudaSpace , void > *
SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr )
{
using Header = SharedAllocationHeader ;
using RecordBase = SharedAllocationRecord< void , void > ;
using RecordCuda = SharedAllocationRecord< Kokkos::CudaSpace , void > ;
#if 0
// Copy the header from the allocation
SharedAllocationHeader head ;
SharedAllocationHeader const * const head_cuda = Header::get_header( alloc_ptr );
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , head_cuda , sizeof(SharedAllocationHeader) );
RecordCuda * const record = static_cast< RecordCuda * >( head.m_record );
if ( record->m_alloc_ptr != head_cuda ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
}
#else
// Iterate the list to search for the record among all allocations
// requires obtaining the root of the list and then locking the list.
RecordCuda * const record = static_cast< RecordCuda * >( RecordBase::find( & s_root_record , alloc_ptr ) );
if ( record == 0 ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
}
#endif
return record ;
}
SharedAllocationRecord< Kokkos::CudaUVMSpace , void > *
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record( void * alloc_ptr )
{
using Header = SharedAllocationHeader ;
using RecordCuda = SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;
Header * const h = reinterpret_cast< Header * >( alloc_ptr ) - 1 ;
if ( h->m_record->m_alloc_ptr != h ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record ERROR" ) );
}
return static_cast< RecordCuda * >( h->m_record );
}
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > *
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record( void * alloc_ptr )
{
using Header = SharedAllocationHeader ;
using RecordCuda = SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > ;
Header * const h = reinterpret_cast< Header * >( alloc_ptr ) - 1 ;
if ( h->m_record->m_alloc_ptr != h ) {
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record ERROR" ) );
}
return static_cast< RecordCuda * >( h->m_record );
}
// Iterate records to print orphaned memory ...
void
SharedAllocationRecord< Kokkos::CudaSpace , void >::
print_records( std::ostream & s , const Kokkos::CudaSpace & space , bool detail )
{
SharedAllocationRecord< void , void > * r = & s_root_record ;
char buffer[256] ;
SharedAllocationHeader head ;
if ( detail ) {
do {
if ( r->m_alloc_ptr ) {
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
}
else {
head.m_label[0] = 0 ;
}
snprintf( buffer , 256 , "Cuda addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n"
, reinterpret_cast<unsigned long>( r )
, reinterpret_cast<unsigned long>( r->m_prev )
, reinterpret_cast<unsigned long>( r->m_next )
, reinterpret_cast<unsigned long>( r->m_alloc_ptr )
, r->m_alloc_size
, r->m_count
, reinterpret_cast<unsigned long>( r->m_dealloc )
, head.m_label
);
std::cout << buffer ;
r = r->m_next ;
} while ( r != & s_root_record );
}
else {
do {
if ( r->m_alloc_ptr ) {
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
snprintf( buffer , 256 , "Cuda [ 0x%.12lx + %ld ] %s\n"
, reinterpret_cast< unsigned long >( r->data() )
, r->size()
, head.m_label
);
}
else {
snprintf( buffer , 256 , "Cuda [ 0 + 0 ]\n" );
}
std::cout << buffer ;
r = r->m_next ;
} while ( r != & s_root_record );
}
}
void
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
print_records( std::ostream & s , const Kokkos::CudaUVMSpace & space , bool detail )
{
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "CudaUVM" , & s_root_record , detail );
}
void
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
print_records( std::ostream & s , const Kokkos::CudaHostPinnedSpace & space , bool detail )
{
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "CudaHostPinned" , & s_root_record , detail );
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace {
__global__ void init_lock_array_kernel() {
unsigned i = blockIdx.x*blockDim.x + threadIdx.x;
if(i<CUDA_SPACE_ATOMIC_MASK+1)
kokkos_impl_cuda_atomic_lock_array[i] = 0;
}
}
namespace Impl {
int* lock_array_cuda_space_ptr(bool deallocate) {
static int* ptr = NULL;
if(deallocate) {
cudaFree(ptr);
ptr = NULL;
}
if(ptr==NULL && !deallocate)
cudaMalloc(&ptr,sizeof(int)*(CUDA_SPACE_ATOMIC_MASK+1));
return ptr;
}
void init_lock_array_cuda_space() {
int is_initialized = 0;
if(! is_initialized) {
int* lock_array_ptr = lock_array_cuda_space_ptr();
cudaMemcpyToSymbol( kokkos_impl_cuda_atomic_lock_array , & lock_array_ptr , sizeof(int*) );
init_lock_array_kernel<<<(CUDA_SPACE_ATOMIC_MASK+255)/256,256>>>();
}
}
}
}
#endif // KOKKOS_HAVE_CUDA

View File

@ -1,183 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP
#define KOKKOS_CUDA_ALLOCATION_TRACKING_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_AllocationTracker.hpp> // AllocatorAttributeBase
namespace Kokkos {
namespace Impl {
template< class DestructFunctor >
SharedAllocationRecord *
shared_allocation_record( Kokkos::CudaSpace const & arg_space
, void * const arg_alloc_ptr
, DestructFunctor const & arg_destruct )
{
SharedAllocationRecord * const record = SharedAllocationRecord::get_record( arg_alloc_ptr );
// assert: record != 0
// assert: sizeof(DestructFunctor) <= record->m_destruct_size
// assert: record->m_destruct_function == 0
DestructFunctor * const functor =
reinterpret_cast< DestructFunctor * >(
reinterpret_cast< unsigned long >( record ) + sizeof(SharedAllocationRecord) );
new( functor ) DestructFunctor( arg_destruct );
record->m_destruct_functor = & shared_allocation_destroy< DestructFunctor > ;
return record ;
}
/// class CudaUnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
struct CudaUnmanagedAllocator
{
static const char * name()
{
return "Cuda Unmanaged Allocator";
}
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
static bool support_texture_binding() { return true; }
};
/// class CudaUnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
struct CudaUnmanagedUVMAllocator
{
static const char * name()
{
return "Cuda Unmanaged UVM Allocator";
}
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
static bool support_texture_binding() { return true; }
};
/// class CudaUnmanagedHostAllocator
/// does nothing when deallocate(ptr,size) is called
class CudaUnmanagedHostAllocator
{
public:
static const char * name()
{
return "Cuda Unmanaged Host Allocator";
}
// Unmanaged deallocate does nothing
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
};
/// class CudaMallocAllocator
class CudaMallocAllocator
{
public:
static const char * name()
{
return "Cuda Malloc Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
static bool support_texture_binding() { return true; }
};
/// class CudaUVMAllocator
class CudaUVMAllocator
{
public:
static const char * name()
{
return "Cuda UVM Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
static bool support_texture_binding() { return true; }
};
/// class CudaHostAllocator
class CudaHostAllocator
{
public:
static const char * name()
{
return "Cuda Host Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
#endif // #ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP

View File

@ -1,192 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <impl/Kokkos_Error.hpp>
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
#include <Cuda/Kokkos_Cuda_Error.hpp>
#include <sstream>
namespace Kokkos { namespace Impl {
/*--------------------------------------------------------------------------*/
TextureAttribute::TextureAttribute( void * const alloc_ptr
, size_t alloc_size
, cudaChannelFormatDesc const & desc
)
: m_tex_obj(0)
{
cuda_device_synchronize();
struct cudaResourceDesc resDesc ;
struct cudaTextureDesc texDesc ;
memset( & resDesc , 0 , sizeof(resDesc) );
memset( & texDesc , 0 , sizeof(texDesc) );
resDesc.resType = cudaResourceTypeLinear ;
resDesc.res.linear.desc = desc ;
resDesc.res.linear.sizeInBytes = alloc_size ;
resDesc.res.linear.devPtr = alloc_ptr ;
CUDA_SAFE_CALL( cudaCreateTextureObject( & m_tex_obj , & resDesc, & texDesc, NULL) );
cuda_device_synchronize();
}
TextureAttribute::~TextureAttribute()
{
if (m_tex_obj) {
cudaDestroyTextureObject( m_tex_obj );
}
}
/*--------------------------------------------------------------------------*/
void * CudaMallocAllocator::allocate( size_t size )
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaMalloc( &ptr, size ) );
return ptr;
}
void CudaMallocAllocator::deallocate( void * ptr, size_t /*size*/ )
{
try {
CUDA_SAFE_CALL( cudaFree( ptr ) );
} catch(...) {}
}
void * CudaMallocAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
void * ptr = old_ptr;
if (old_size != new_size) {
ptr = allocate( new_size );
size_t copy_size = old_size < new_size ? old_size : new_size;
CUDA_SAFE_CALL( cudaMemcpy( ptr , old_ptr , copy_size , cudaMemcpyDefault ) );
deallocate( old_ptr, old_size );
}
return ptr;
}
/*--------------------------------------------------------------------------*/
void * CudaUVMAllocator::allocate( size_t size )
{
#if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION )
void * ptr = NULL;
CUDA_SAFE_CALL( cudaMallocManaged( &ptr, size, cudaMemAttachGlobal ) );
return ptr;
#else
throw_runtime_exception( "CUDA VERSION does not support UVM" );
return NULL;
#endif
}
void CudaUVMAllocator::deallocate( void * ptr, size_t /*size*/ )
{
try {
CUDA_SAFE_CALL( cudaFree( ptr ) );
} catch(...) {}
}
void * CudaUVMAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
void * ptr = old_ptr;
if (old_size != new_size) {
ptr = allocate( new_size );
size_t copy_size = old_size < new_size ? old_size : new_size;
CUDA_SAFE_CALL( cudaMemcpy( ptr , old_ptr , copy_size , cudaMemcpyDefault ) );
deallocate( old_ptr, old_size );
}
return ptr;
}
/*--------------------------------------------------------------------------*/
void * CudaHostAllocator::allocate( size_t size )
{
void * ptr = NULL;
CUDA_SAFE_CALL( cudaHostAlloc( &ptr , size , cudaHostAllocDefault ) );
return ptr;
}
void CudaHostAllocator::deallocate( void * ptr, size_t /*size*/ )
{
try {
CUDA_SAFE_CALL( cudaFreeHost( ptr ) );
} catch(...) {}
}
void * CudaHostAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
{
void * ptr = old_ptr;
if (old_size != new_size) {
ptr = allocate( new_size );
size_t copy_size = old_size < new_size ? old_size : new_size;
CUDA_SAFE_CALL( cudaMemcpy( ptr , old_ptr , copy_size , cudaMemcpyHostToHost ) );
deallocate( old_ptr, old_size );
}
return ptr;
}
/*--------------------------------------------------------------------------*/
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA

View File

@ -1,187 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_BASIC_ALLOCATORS_HPP
#define KOKKOS_CUDA_BASIC_ALLOCATORS_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_AllocationTracker.hpp> // AllocatorAttributeBase
namespace Kokkos { namespace Impl {
// Cuda 5.0 <texture_types.h> defines 'cudaTextureObject_t'
// to be an 'unsigned long long'. This chould change with
// future version of Cuda and this typedef would have to
// change accordingly.
#if defined( CUDA_VERSION ) && ( 5000 <= CUDA_VERSION )
typedef enable_if<
sizeof(::cudaTextureObject_t) == sizeof(const void *) ,
::cudaTextureObject_t >::type cuda_texture_object_type ;
#else
typedef const void * cuda_texture_object_type ;
#endif
struct TextureAttribute : public AllocatorAttributeBase
{
cuda_texture_object_type m_tex_obj ;
TextureAttribute( void * const alloc_ptr
, size_t alloc_size
, cudaChannelFormatDesc const & desc
);
~TextureAttribute();
};
/// class CudaUnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
struct CudaUnmanagedAllocator
{
static const char * name()
{
return "Cuda Unmanaged Allocator";
}
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
static bool support_texture_binding() { return true; }
};
/// class CudaUnmanagedAllocator
/// does nothing when deallocate(ptr,size) is called
struct CudaUnmanagedUVMAllocator
{
static const char * name()
{
return "Cuda Unmanaged UVM Allocator";
}
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
static bool support_texture_binding() { return true; }
};
/// class CudaUnmanagedHostAllocator
/// does nothing when deallocate(ptr,size) is called
class CudaUnmanagedHostAllocator
{
public:
static const char * name()
{
return "Cuda Unmanaged Host Allocator";
}
// Unmanaged deallocate does nothing
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
};
/// class CudaMallocAllocator
class CudaMallocAllocator
{
public:
static const char * name()
{
return "Cuda Malloc Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
static bool support_texture_binding() { return true; }
};
/// class CudaUVMAllocator
class CudaUVMAllocator
{
public:
static const char * name()
{
return "Cuda UVM Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
static bool support_texture_binding() { return true; }
};
/// class CudaHostAllocator
class CudaHostAllocator
{
public:
static const char * name()
{
return "Cuda Host Allocator";
}
static void* allocate(size_t size);
static void deallocate(void * ptr, size_t);
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
};
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
#endif //KOKKOS_CUDA_BASIC_ALLOCATORS_HPP

View File

@ -1,69 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_ERROR_HPP
#define KOKKOS_CUDA_ERROR_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
namespace Kokkos { namespace Impl {
void cuda_device_synchronize();
void cuda_internal_error_throw( cudaError e , const char * name, const char * file = NULL, const int line = 0 );
inline void cuda_internal_safe_call( cudaError e , const char * name, const char * file = NULL, const int line = 0)
{
if ( cudaSuccess != e ) { cuda_internal_error_throw( e , name, file, line ); }
}
#define CUDA_SAFE_CALL( call ) \
Kokkos::Impl::cuda_internal_safe_call( call , #call, __FILE__, __LINE__ )
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
#endif //KOKKOS_CUDA_ERROR_HPP

View File

@ -1,678 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/*--------------------------------------------------------------------------*/
/* Kokkos interfaces */
#include <Kokkos_Core.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <Cuda/Kokkos_Cuda_Error.hpp>
#include <Cuda/Kokkos_Cuda_Internal.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <impl/Kokkos_Error.hpp>
/*--------------------------------------------------------------------------*/
/* Standard 'C' libraries */
#include <stdlib.h>
/* Standard 'C++' libraries */
#include <vector>
#include <iostream>
#include <sstream>
#include <string>
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
__device__ __constant__
Kokkos::Impl::CudaTraits::ConstantGlobalBufferType
kokkos_impl_cuda_constant_memory_buffer ;
#endif
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
namespace {
__global__
void query_cuda_kernel_arch( int * d_arch )
{
#if defined( __CUDA_ARCH__ )
*d_arch = __CUDA_ARCH__ ;
#else
*d_arch = 0 ;
#endif
}
/** Query what compute capability is actually launched to the device: */
int cuda_kernel_arch()
{
int * d_arch = 0 ;
cudaMalloc( (void **) & d_arch , sizeof(int) );
query_cuda_kernel_arch<<<1,1>>>( d_arch );
int arch = 0 ;
cudaMemcpy( & arch , d_arch , sizeof(int) , cudaMemcpyDefault );
cudaFree( d_arch );
return arch ;
}
bool cuda_launch_blocking()
{
const char * env = getenv("CUDA_LAUNCH_BLOCKING");
if (env == 0) return false;
return atoi(env);
}
}
void cuda_device_synchronize()
{
// static const bool launch_blocking = cuda_launch_blocking();
// if (!launch_blocking) {
CUDA_SAFE_CALL( cudaDeviceSynchronize() );
// }
}
void cuda_internal_error_throw( cudaError e , const char * name, const char * file, const int line )
{
std::ostringstream out ;
out << name << " error( " << cudaGetErrorName(e) << "): " << cudaGetErrorString(e);
if (file) {
out << " " << file << ":" << line;
}
throw_runtime_exception( out.str() );
}
//----------------------------------------------------------------------------
// Some significant cuda device properties:
//
// cudaDeviceProp::name : Text label for device
// cudaDeviceProp::major : Device major number
// cudaDeviceProp::minor : Device minor number
// cudaDeviceProp::warpSize : number of threads per warp
// cudaDeviceProp::multiProcessorCount : number of multiprocessors
// cudaDeviceProp::sharedMemPerBlock : capacity of shared memory per block
// cudaDeviceProp::totalConstMem : capacity of constant memory
// cudaDeviceProp::totalGlobalMem : capacity of global memory
// cudaDeviceProp::maxGridSize[3] : maximum grid size
//
// Section 4.4.2.4 of the CUDA Toolkit Reference Manual
//
// struct cudaDeviceProp {
// char name[256];
// size_t totalGlobalMem;
// size_t sharedMemPerBlock;
// int regsPerBlock;
// int warpSize;
// size_t memPitch;
// int maxThreadsPerBlock;
// int maxThreadsDim[3];
// int maxGridSize[3];
// size_t totalConstMem;
// int major;
// int minor;
// int clockRate;
// size_t textureAlignment;
// int deviceOverlap;
// int multiProcessorCount;
// int kernelExecTimeoutEnabled;
// int integrated;
// int canMapHostMemory;
// int computeMode;
// int concurrentKernels;
// int ECCEnabled;
// int pciBusID;
// int pciDeviceID;
// int tccDriver;
// int asyncEngineCount;
// int unifiedAddressing;
// int memoryClockRate;
// int memoryBusWidth;
// int l2CacheSize;
// int maxThreadsPerMultiProcessor;
// };
namespace {
class CudaInternalDevices {
public:
enum { MAXIMUM_DEVICE_COUNT = 8 };
struct cudaDeviceProp m_cudaProp[ MAXIMUM_DEVICE_COUNT ] ;
int m_cudaDevCount ;
CudaInternalDevices();
static const CudaInternalDevices & singleton();
};
CudaInternalDevices::CudaInternalDevices()
{
// See 'cudaSetDeviceFlags' for host-device thread interaction
// Section 4.4.2.6 of the CUDA Toolkit Reference Manual
CUDA_SAFE_CALL (cudaGetDeviceCount( & m_cudaDevCount ) );
for ( int i = 0 ; i < m_cudaDevCount ; ++i ) {
CUDA_SAFE_CALL( cudaGetDeviceProperties( m_cudaProp + i , i ) );
}
}
const CudaInternalDevices & CudaInternalDevices::singleton()
{
static CudaInternalDevices self ; return self ;
}
}
//----------------------------------------------------------------------------
class CudaInternal {
private:
CudaInternal( const CudaInternal & );
CudaInternal & operator = ( const CudaInternal & );
AllocationTracker m_scratchFlagsTracker;
AllocationTracker m_scratchSpaceTracker;
AllocationTracker m_scratchUnifiedTracker;
public:
typedef Cuda::size_type size_type ;
int m_cudaDev ;
int m_cudaArch ;
unsigned m_maxWarpCount ;
unsigned m_maxBlock ;
unsigned m_maxSharedWords ;
size_type m_scratchSpaceCount ;
size_type m_scratchFlagsCount ;
size_type m_scratchUnifiedCount ;
size_type m_scratchUnifiedSupported ;
size_type m_streamCount ;
size_type * m_scratchSpace ;
size_type * m_scratchFlags ;
size_type * m_scratchUnified ;
cudaStream_t * m_stream ;
static CudaInternal & singleton();
int verify_is_initialized( const char * const label ) const ;
int is_initialized() const
{ return 0 != m_scratchSpace && 0 != m_scratchFlags ; }
void initialize( int cuda_device_id , int stream_count );
void finalize();
void print_configuration( std::ostream & ) const ;
~CudaInternal();
CudaInternal()
: m_cudaDev( -1 )
, m_cudaArch( -1 )
, m_maxWarpCount( 0 )
, m_maxBlock( 0 )
, m_maxSharedWords( 0 )
, m_scratchSpaceCount( 0 )
, m_scratchFlagsCount( 0 )
, m_scratchUnifiedCount( 0 )
, m_scratchUnifiedSupported( 0 )
, m_streamCount( 0 )
, m_scratchSpace( 0 )
, m_scratchFlags( 0 )
, m_scratchUnified( 0 )
, m_stream( 0 )
{}
size_type * scratch_space( const size_type size );
size_type * scratch_flags( const size_type size );
size_type * scratch_unified( const size_type size );
};
//----------------------------------------------------------------------------
void CudaInternal::print_configuration( std::ostream & s ) const
{
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
#if defined( KOKKOS_HAVE_CUDA )
s << "macro KOKKOS_HAVE_CUDA : defined" << std::endl ;
#endif
#if defined( CUDA_VERSION )
s << "macro CUDA_VERSION = " << CUDA_VERSION
<< " = version " << CUDA_VERSION / 1000
<< "." << ( CUDA_VERSION % 1000 ) / 10
<< std::endl ;
#endif
for ( int i = 0 ; i < dev_info.m_cudaDevCount ; ++i ) {
s << "Kokkos::Cuda[ " << i << " ] "
<< dev_info.m_cudaProp[i].name
<< " capability " << dev_info.m_cudaProp[i].major << "." << dev_info.m_cudaProp[i].minor
<< ", Total Global Memory: " << human_memory_size(dev_info.m_cudaProp[i].totalGlobalMem)
<< ", Shared Memory per Block: " << human_memory_size(dev_info.m_cudaProp[i].sharedMemPerBlock);
if ( m_cudaDev == i ) s << " : Selected" ;
s << std::endl ;
}
}
//----------------------------------------------------------------------------
CudaInternal::~CudaInternal()
{
if ( m_stream ||
m_scratchSpace ||
m_scratchFlags ||
m_scratchUnified ) {
std::cerr << "Kokkos::Cuda ERROR: Failed to call Kokkos::Cuda::finalize()"
<< std::endl ;
std::cerr.flush();
}
m_cudaDev = -1 ;
m_cudaArch = -1 ;
m_maxWarpCount = 0 ;
m_maxBlock = 0 ;
m_maxSharedWords = 0 ;
m_scratchSpaceCount = 0 ;
m_scratchFlagsCount = 0 ;
m_scratchUnifiedCount = 0 ;
m_scratchUnifiedSupported = 0 ;
m_streamCount = 0 ;
m_scratchSpace = 0 ;
m_scratchFlags = 0 ;
m_scratchUnified = 0 ;
m_stream = 0 ;
}
int CudaInternal::verify_is_initialized( const char * const label ) const
{
if ( m_cudaDev < 0 ) {
std::cerr << "Kokkos::Cuda::" << label << " : ERROR device not initialized" << std::endl ;
}
return 0 <= m_cudaDev ;
}
CudaInternal & CudaInternal::singleton()
{
static CudaInternal self ;
return self ;
}
void CudaInternal::initialize( int cuda_device_id , int stream_count )
{
enum { WordSize = sizeof(size_type) };
if ( ! HostSpace::execution_space::is_initialized() ) {
const std::string msg("Cuda::initialize ERROR : HostSpace::execution_space is not initialized");
throw_runtime_exception( msg );
}
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
const bool ok_init = 0 == m_scratchSpace || 0 == m_scratchFlags ;
const bool ok_id = 0 <= cuda_device_id &&
cuda_device_id < dev_info.m_cudaDevCount ;
// Need device capability 2.0 or better
const bool ok_dev = ok_id &&
( 2 <= dev_info.m_cudaProp[ cuda_device_id ].major &&
0 <= dev_info.m_cudaProp[ cuda_device_id ].minor );
if ( ok_init && ok_dev ) {
const struct cudaDeviceProp & cudaProp =
dev_info.m_cudaProp[ cuda_device_id ];
m_cudaDev = cuda_device_id ;
CUDA_SAFE_CALL( cudaSetDevice( m_cudaDev ) );
CUDA_SAFE_CALL( cudaDeviceReset() );
Kokkos::Impl::cuda_device_synchronize();
// Query what compute capability architecture a kernel executes:
m_cudaArch = cuda_kernel_arch();
if ( m_cudaArch != cudaProp.major * 100 + cudaProp.minor * 10 ) {
std::cerr << "Kokkos::Cuda::initialize WARNING: running kernels compiled for compute capability "
<< ( m_cudaArch / 100 ) << "." << ( ( m_cudaArch % 100 ) / 10 )
<< " on device with compute capability "
<< cudaProp.major << "." << cudaProp.minor
<< " , this will likely reduce potential performance."
<< std::endl ;
}
//----------------------------------
// Maximum number of warps,
// at most one warp per thread in a warp for reduction.
// HCE 2012-February :
// Found bug in CUDA 4.1 that sometimes a kernel launch would fail
// if the thread count == 1024 and a functor is passed to the kernel.
// Copying the kernel to constant memory and then launching with
// thread count == 1024 would work fine.
//
// HCE 2012-October :
// All compute capabilities support at least 16 warps (512 threads).
// However, we have found that 8 warps typically gives better performance.
m_maxWarpCount = 8 ;
// m_maxWarpCount = cudaProp.maxThreadsPerBlock / Impl::CudaTraits::WarpSize ;
if ( Impl::CudaTraits::WarpSize < m_maxWarpCount ) {
m_maxWarpCount = Impl::CudaTraits::WarpSize ;
}
m_maxSharedWords = cudaProp.sharedMemPerBlock / WordSize ;
//----------------------------------
// Maximum number of blocks:
m_maxBlock = m_cudaArch < 300 ? 65535 : cudaProp.maxGridSize[0] ;
//----------------------------------
m_scratchUnifiedSupported = cudaProp.unifiedAddressing ;
if ( ! m_scratchUnifiedSupported ) {
std::cout << "Kokkos::Cuda device "
<< cudaProp.name << " capability "
<< cudaProp.major << "." << cudaProp.minor
<< " does not support unified virtual address space"
<< std::endl ;
}
//----------------------------------
// Multiblock reduction uses scratch flags for counters
// and scratch space for partial reduction values.
// Allocate some initial space. This will grow as needed.
{
const unsigned reduce_block_count = m_maxWarpCount * Impl::CudaTraits::WarpSize ;
(void) scratch_unified( 16 * sizeof(size_type) );
(void) scratch_flags( reduce_block_count * 2 * sizeof(size_type) );
(void) scratch_space( reduce_block_count * 16 * sizeof(size_type) );
}
//----------------------------------
if ( stream_count ) {
m_stream = (cudaStream_t*) ::malloc( stream_count * sizeof(cudaStream_t) );
m_streamCount = stream_count ;
for ( size_type i = 0 ; i < m_streamCount ; ++i ) m_stream[i] = 0 ;
}
}
else {
std::ostringstream msg ;
msg << "Kokkos::Cuda::initialize(" << cuda_device_id << ") FAILED" ;
if ( ! ok_init ) {
msg << " : Already initialized" ;
}
if ( ! ok_id ) {
msg << " : Device identifier out of range "
<< "[0.." << dev_info.m_cudaDevCount << "]" ;
}
else if ( ! ok_dev ) {
msg << " : Device " ;
msg << dev_info.m_cudaProp[ cuda_device_id ].major ;
msg << "." ;
msg << dev_info.m_cudaProp[ cuda_device_id ].minor ;
msg << " has insufficient capability, required 2.0 or better" ;
}
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
// Init the array for used for arbitrarily sized atomics
Impl::init_lock_array_cuda_space();
}
//----------------------------------------------------------------------------
typedef Cuda::size_type ScratchGrain[ Impl::CudaTraits::WarpSize ] ;
enum { sizeScratchGrain = sizeof(ScratchGrain) };
Cuda::size_type *
CudaInternal::scratch_flags( const Cuda::size_type size )
{
if ( verify_is_initialized("scratch_flags") && m_scratchFlagsCount * sizeScratchGrain < size ) {
m_scratchFlagsCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
m_scratchFlagsTracker = CudaSpace::allocate_and_track( std::string("InternalScratchFlags") , sizeof( ScratchGrain ) * m_scratchFlagsCount );
m_scratchFlags = reinterpret_cast<size_type *>(m_scratchFlagsTracker.alloc_ptr());
CUDA_SAFE_CALL( cudaMemset( m_scratchFlags , 0 , m_scratchFlagsCount * sizeScratchGrain ) );
}
return m_scratchFlags ;
}
Cuda::size_type *
CudaInternal::scratch_space( const Cuda::size_type size )
{
if ( verify_is_initialized("scratch_space") && m_scratchSpaceCount * sizeScratchGrain < size ) {
m_scratchSpaceCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
m_scratchSpaceTracker = CudaSpace::allocate_and_track( std::string("InternalScratchSpace") , sizeof( ScratchGrain ) * m_scratchSpaceCount );
m_scratchSpace = reinterpret_cast<size_type *>(m_scratchSpaceTracker.alloc_ptr());
}
return m_scratchSpace ;
}
Cuda::size_type *
CudaInternal::scratch_unified( const Cuda::size_type size )
{
if ( verify_is_initialized("scratch_unified") &&
m_scratchUnifiedSupported && m_scratchUnifiedCount * sizeScratchGrain < size ) {
m_scratchUnifiedCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
m_scratchUnifiedTracker = CudaHostPinnedSpace::allocate_and_track( std::string("InternalScratchUnified") , sizeof( ScratchGrain ) * m_scratchUnifiedCount );
m_scratchUnified = reinterpret_cast<size_type *>( m_scratchUnifiedTracker.alloc_ptr() );
}
return m_scratchUnified ;
}
//----------------------------------------------------------------------------
void CudaInternal::finalize()
{
if ( 0 != m_scratchSpace || 0 != m_scratchFlags ) {
lock_array_cuda_space_ptr(true);
if ( m_stream ) {
for ( size_type i = 1 ; i < m_streamCount ; ++i ) {
cudaStreamDestroy( m_stream[i] );
m_stream[i] = 0 ;
}
::free( m_stream );
}
m_scratchSpaceTracker.clear();
m_scratchFlagsTracker.clear();
m_scratchUnifiedTracker.clear();
m_cudaDev = -1 ;
m_maxWarpCount = 0 ;
m_maxBlock = 0 ;
m_maxSharedWords = 0 ;
m_scratchSpaceCount = 0 ;
m_scratchFlagsCount = 0 ;
m_scratchUnifiedCount = 0 ;
m_streamCount = 0 ;
m_scratchSpace = 0 ;
m_scratchFlags = 0 ;
m_scratchUnified = 0 ;
m_stream = 0 ;
}
}
//----------------------------------------------------------------------------
Cuda::size_type cuda_internal_maximum_warp_count()
{ return CudaInternal::singleton().m_maxWarpCount ; }
Cuda::size_type cuda_internal_maximum_grid_count()
{ return CudaInternal::singleton().m_maxBlock ; }
Cuda::size_type cuda_internal_maximum_shared_words()
{ return CudaInternal::singleton().m_maxSharedWords ; }
Cuda::size_type * cuda_internal_scratch_space( const Cuda::size_type size )
{ return CudaInternal::singleton().scratch_space( size ); }
Cuda::size_type * cuda_internal_scratch_flags( const Cuda::size_type size )
{ return CudaInternal::singleton().scratch_flags( size ); }
Cuda::size_type * cuda_internal_scratch_unified( const Cuda::size_type size )
{ return CudaInternal::singleton().scratch_unified( size ); }
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
Cuda::size_type Cuda::detect_device_count()
{ return Impl::CudaInternalDevices::singleton().m_cudaDevCount ; }
int Cuda::is_initialized()
{ return Impl::CudaInternal::singleton().is_initialized(); }
void Cuda::initialize( const Cuda::SelectDevice config , size_t num_instances )
{ Impl::CudaInternal::singleton().initialize( config.cuda_device_id , num_instances ); }
std::vector<unsigned>
Cuda::detect_device_arch()
{
const Impl::CudaInternalDevices & s = Impl::CudaInternalDevices::singleton();
std::vector<unsigned> output( s.m_cudaDevCount );
for ( int i = 0 ; i < s.m_cudaDevCount ; ++i ) {
output[i] = s.m_cudaProp[i].major * 100 + s.m_cudaProp[i].minor ;
}
return output ;
}
Cuda::size_type Cuda::device_arch()
{
const int dev_id = Impl::CudaInternal::singleton().m_cudaDev ;
int dev_arch = 0 ;
if ( 0 <= dev_id ) {
const struct cudaDeviceProp & cudaProp =
Impl::CudaInternalDevices::singleton().m_cudaProp[ dev_id ] ;
dev_arch = cudaProp.major * 100 + cudaProp.minor ;
}
return dev_arch ;
}
void Cuda::finalize()
{ Impl::CudaInternal::singleton().finalize(); }
Cuda::Cuda()
: m_device( Impl::CudaInternal::singleton().m_cudaDev )
, m_stream( 0 )
{
Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" );
}
Cuda::Cuda( const int instance_id )
: m_device( Impl::CudaInternal::singleton().m_cudaDev )
, m_stream(
Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" )
? Impl::CudaInternal::singleton().m_stream[ instance_id % Impl::CudaInternal::singleton().m_streamCount ]
: 0 )
{}
void Cuda::print_configuration( std::ostream & s , const bool )
{ Impl::CudaInternal::singleton().print_configuration( s ); }
bool Cuda::sleep() { return false ; }
bool Cuda::wake() { return true ; }
void Cuda::fence()
{
Kokkos::Impl::cuda_device_synchronize();
}
} // namespace Kokkos
#endif // KOKKOS_HAVE_CUDA
//----------------------------------------------------------------------------

View File

@ -1,165 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_INTERNAL_HPP
#define KOKKOS_CUDA_INTERNAL_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <Cuda/Kokkos_Cuda_Error.hpp>
namespace Kokkos { namespace Impl {
template<class DriverType>
int cuda_get_max_block_size(const typename DriverType::functor_type & f) {
#if ( CUDA_VERSION < 6050 )
return 256;
#else
bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) );
int numBlocks;
if(Large) {
int blockSize=32;
int sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_constant_memory<DriverType>,
blockSize,
sharedmem);
while (blockSize<1024 && numBlocks>0) {
blockSize*=2;
sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_constant_memory<DriverType>,
blockSize,
sharedmem);
}
if(numBlocks>0) return blockSize;
else return blockSize/2;
} else {
int blockSize=32;
int sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_local_memory<DriverType>,
blockSize,
sharedmem);
while (blockSize<1024 && numBlocks>0) {
blockSize*=2;
sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_local_memory<DriverType>,
blockSize,
sharedmem);
}
if(numBlocks>0) return blockSize;
else return blockSize/2;
}
#endif
}
template<class DriverType>
int cuda_get_opt_block_size(const typename DriverType::functor_type & f) {
#if ( CUDA_VERSION < 6050 )
return 256;
#else
bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) );
int blockSize=16;
int numBlocks;
int sharedmem;
int maxOccupancy=0;
int bestBlockSize=0;
if(Large) {
while(blockSize<1024) {
blockSize*=2;
//calculate the occupancy with that optBlockSize and check whether its larger than the largest one found so far
sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_constant_memory<DriverType>,
blockSize,
sharedmem);
if(maxOccupancy < numBlocks*blockSize) {
maxOccupancy = numBlocks*blockSize;
bestBlockSize = blockSize;
}
}
} else {
while(blockSize<1024) {
blockSize*=2;
sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&numBlocks,
cuda_parallel_launch_local_memory<DriverType>,
blockSize,
sharedmem);
if(maxOccupancy < numBlocks*blockSize) {
maxOccupancy = numBlocks*blockSize;
bestBlockSize = blockSize;
}
}
}
return bestBlockSize;
#endif
}
}} // namespace Kokkos::Impl
#endif // KOKKOS_HAVE_CUDA
#endif /* #ifndef KOKKOS_CUDA_INTERNAL_HPP */

File diff suppressed because it is too large Load Diff

View File

@ -1,424 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_REDUCESCAN_HPP
#define KOKKOS_CUDA_REDUCESCAN_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
#include <utility>
#include <Kokkos_Parallel.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Cuda/Kokkos_Cuda_Vectorization.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//Shfl based reductions
/*
* Algorithmic constraints:
* (a) threads with same threadIdx.y have same value
* (b) blockDim.x == power of two
* (c) blockDim.z == 1
*/
template< class ValueType , class JoinOp>
__device__
inline void cuda_intra_warp_reduction( ValueType& result,
const JoinOp& join,
const int max_active_thread = blockDim.y) {
unsigned int shift = 1;
//Reduce over values from threads with different threadIdx.y
while(blockDim.x * shift < 32 ) {
const ValueType tmp = shfl_down(result, blockDim.x*shift,32u);
//Only join if upper thread is active (this allows non power of two for blockDim.y
if(threadIdx.y + shift < max_active_thread)
join(result , tmp);
shift*=2;
}
result = shfl(result,0,32);
}
template< class ValueType , class JoinOp>
__device__
inline void cuda_inter_warp_reduction( ValueType& value,
const JoinOp& join,
const int max_active_thread = blockDim.y) {
#define STEP_WIDTH 4
__shared__ char sh_result[sizeof(ValueType)*STEP_WIDTH];
ValueType* result = (ValueType*) & sh_result;
const unsigned step = 32 / blockDim.x;
unsigned shift = STEP_WIDTH;
const int id = threadIdx.y%step==0?threadIdx.y/step:65000;
if(id < STEP_WIDTH ) {
result[id] = value;
}
__syncthreads();
while (shift<=max_active_thread/step) {
if(shift<=id && shift+STEP_WIDTH>id && threadIdx.x==0) {
join(result[id%STEP_WIDTH],value);
}
__syncthreads();
shift+=STEP_WIDTH;
}
value = result[0];
for(int i = 1; (i*step<=max_active_thread) && i<STEP_WIDTH; i++)
join(value,result[i]);
}
template< class ValueType , class JoinOp>
__device__
inline void cuda_intra_block_reduction( ValueType& value,
const JoinOp& join,
const int max_active_thread = blockDim.y) {
cuda_intra_warp_reduction(value,join,max_active_thread);
cuda_inter_warp_reduction(value,join,max_active_thread);
}
template< class FunctorType , class JoinOp>
__device__
bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , void >::reference_type value,
const JoinOp& join,
Cuda::size_type * const m_scratch_space,
typename FunctorValueTraits< FunctorType , void >::pointer_type const result,
Cuda::size_type * const m_scratch_flags,
const int max_active_thread = blockDim.y) {
typedef typename FunctorValueTraits< FunctorType , void >::pointer_type pointer_type;
typedef typename FunctorValueTraits< FunctorType , void >::value_type value_type;
//Do the intra-block reduction with shfl operations and static shared memory
cuda_intra_block_reduction(value,join,max_active_thread);
const unsigned id = threadIdx.y*blockDim.x + threadIdx.x;
//One thread in the block writes block result to global scratch_memory
if(id == 0 ) {
pointer_type global = ((pointer_type) m_scratch_space) + blockIdx.x;
*global = value;
}
//One warp of last block performs inter block reduction through loading the block values from global scratch_memory
bool last_block = false;
__syncthreads();
if ( id < 32 ) {
Cuda::size_type count;
//Figure out whether this is the last block
if(id == 0)
count = Kokkos::atomic_fetch_add(m_scratch_flags,1);
count = Kokkos::shfl(count,0,32);
//Last block does the inter block reduction
if( count == gridDim.x - 1) {
//set flag back to zero
if(id == 0)
*m_scratch_flags = 0;
last_block = true;
value = 0;
pointer_type const volatile global = (pointer_type) m_scratch_space ;
//Reduce all global values with splitting work over threads in one warp
const int step_size = blockDim.x*blockDim.y < 32 ? blockDim.x*blockDim.y : 32;
for(int i=id; i<gridDim.x; i+=step_size) {
value_type tmp = global[i];
join(value, tmp);
}
//Perform shfl reductions within the warp only join if contribution is valid (allows gridDim.x non power of two and <32)
if (blockDim.x*blockDim.y > 1) {
value_type tmp = Kokkos::shfl_down(value, 1,32);
if( id + 1 < gridDim.x )
join(value, tmp);
}
if (blockDim.x*blockDim.y > 2) {
value_type tmp = Kokkos::shfl_down(value, 2,32);
if( id + 2 < gridDim.x )
join(value, tmp);
}
if (blockDim.x*blockDim.y > 4) {
value_type tmp = Kokkos::shfl_down(value, 4,32);
if( id + 4 < gridDim.x )
join(value, tmp);
}
if (blockDim.x*blockDim.y > 8) {
value_type tmp = Kokkos::shfl_down(value, 8,32);
if( id + 8 < gridDim.x )
join(value, tmp);
}
if (blockDim.x*blockDim.y > 16) {
value_type tmp = Kokkos::shfl_down(value, 16,32);
if( id + 16 < gridDim.x )
join(value, tmp);
}
}
}
//The last block has in its thread=0 the global reduction value through "value"
return last_block;
}
//----------------------------------------------------------------------------
// See section B.17 of Cuda C Programming Guide Version 3.2
// for discussion of
// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor)
// function qualifier which could be used to improve performance.
//----------------------------------------------------------------------------
// Maximize shared memory and minimize L1 cache:
// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferShared );
// For 2.0 capability: 48 KB shared and 16 KB L1
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/*
* Algorithmic constraints:
* (a) blockDim.y is a power of two
* (b) blockDim.y <= 512
* (c) blockDim.x == blockDim.z == 1
*/
template< bool DoScan , class FunctorType , class ArgTag >
__device__
void cuda_intra_block_reduce_scan( const FunctorType & functor ,
const typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type base_data )
{
typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ;
typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
typedef typename ValueTraits::pointer_type pointer_type ;
const unsigned value_count = ValueTraits::value_count( functor );
const unsigned BlockSizeMask = blockDim.y - 1 ;
// Must have power of two thread count
if ( BlockSizeMask & blockDim.y ) { Kokkos::abort("Cuda::cuda_intra_block_scan requires power-of-two blockDim"); }
#define BLOCK_REDUCE_STEP( R , TD , S ) \
if ( ! ( R & ((1<<(S+1))-1) ) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S)) ); }
#define BLOCK_SCAN_STEP( TD , N , S ) \
if ( N == (1<<S) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S))); }
const unsigned rtid_intra = threadIdx.y ^ BlockSizeMask ;
const pointer_type tdata_intra = base_data + value_count * threadIdx.y ;
{ // Intra-warp reduction:
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,0)
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,1)
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,2)
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,3)
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,4)
}
__syncthreads(); // Wait for all warps to reduce
{ // Inter-warp reduce-scan by a single warp to avoid extra synchronizations
const unsigned rtid_inter = ( threadIdx.y ^ BlockSizeMask ) << CudaTraits::WarpIndexShift ;
if ( rtid_inter < blockDim.y ) {
const pointer_type tdata_inter = base_data + value_count * ( rtid_inter ^ BlockSizeMask );
if ( (1<<5) < BlockSizeMask ) { BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,5) }
if ( (1<<6) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,6) }
if ( (1<<7) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,7) }
if ( (1<<8) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,8) }
if ( DoScan ) {
int n = ( rtid_inter & 32 ) ? 32 : (
( rtid_inter & 64 ) ? 64 : (
( rtid_inter & 128 ) ? 128 : (
( rtid_inter & 256 ) ? 256 : 0 )));
if ( ! ( rtid_inter + n < blockDim.y ) ) n = 0 ;
BLOCK_SCAN_STEP(tdata_inter,n,8)
BLOCK_SCAN_STEP(tdata_inter,n,7)
BLOCK_SCAN_STEP(tdata_inter,n,6)
BLOCK_SCAN_STEP(tdata_inter,n,5)
}
}
}
__syncthreads(); // Wait for inter-warp reduce-scan to complete
if ( DoScan ) {
int n = ( rtid_intra & 1 ) ? 1 : (
( rtid_intra & 2 ) ? 2 : (
( rtid_intra & 4 ) ? 4 : (
( rtid_intra & 8 ) ? 8 : (
( rtid_intra & 16 ) ? 16 : 0 ))));
if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ;
BLOCK_SCAN_STEP(tdata_intra,n,4) __threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,3) __threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,2) __threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,1) __threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,0)
}
#undef BLOCK_SCAN_STEP
#undef BLOCK_REDUCE_STEP
}
//----------------------------------------------------------------------------
/**\brief Input value-per-thread starting at 'shared_data'.
* Reduction value at last thread's location.
*
* If 'DoScan' then write blocks' scan values and block-groups' scan values.
*
* Global reduce result is in the last threads' 'shared_data' location.
*/
template< bool DoScan , class FunctorType , class ArgTag >
__device__
bool cuda_single_inter_block_reduce_scan( const FunctorType & functor ,
const Cuda::size_type block_id ,
const Cuda::size_type block_count ,
Cuda::size_type * const shared_data ,
Cuda::size_type * const global_data ,
Cuda::size_type * const global_flags )
{
typedef Cuda::size_type size_type ;
typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ;
typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
typedef FunctorValueInit< FunctorType , ArgTag > ValueInit ;
typedef FunctorValueOps< FunctorType , ArgTag > ValueOps ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
const unsigned BlockSizeMask = blockDim.y - 1 ;
const unsigned BlockSizeShift = power_of_two_if_valid( blockDim.y );
// Must have power of two thread count
if ( BlockSizeMask & blockDim.y ) { Kokkos::abort("Cuda::cuda_single_inter_block_reduce_scan requires power-of-two blockDim"); }
const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) >
word_count( ValueTraits::value_size( functor ) / sizeof(size_type) );
// Reduce the accumulation for the entire block.
cuda_intra_block_reduce_scan<false,FunctorType,ArgTag>( functor , pointer_type(shared_data) );
{
// Write accumulation total to global scratch space.
// Accumulation total is the last thread's data.
size_type * const shared = shared_data + word_count.value * BlockSizeMask ;
size_type * const global = global_data + word_count.value * block_id ;
for ( size_type i = threadIdx.y ; i < word_count.value ; i += blockDim.y ) { global[i] = shared[i] ; }
}
// Contributing blocks note that their contribution has been completed via an atomic-increment flag
// If this block is not the last block to contribute to this group then the block is done.
const bool is_last_block =
! __syncthreads_or( threadIdx.y ? 0 : ( 1 + atomicInc( global_flags , block_count - 1 ) < block_count ) );
if ( is_last_block ) {
const size_type b = ( long(block_count) * long(threadIdx.y) ) >> BlockSizeShift ;
const size_type e = ( long(block_count) * long( threadIdx.y + 1 ) ) >> BlockSizeShift ;
{
void * const shared_ptr = shared_data + word_count.value * threadIdx.y ;
reference_type shared_value = ValueInit::init( functor , shared_ptr );
for ( size_type i = b ; i < e ; ++i ) {
ValueJoin::join( functor , shared_ptr , global_data + word_count.value * i );
}
}
cuda_intra_block_reduce_scan<DoScan,FunctorType,ArgTag>( functor , pointer_type(shared_data) );
if ( DoScan ) {
size_type * const shared_value = shared_data + word_count.value * ( threadIdx.y ? threadIdx.y - 1 : blockDim.y );
if ( ! threadIdx.y ) { ValueInit::init( functor , shared_value ); }
// Join previous inclusive scan value to each member
for ( size_type i = b ; i < e ; ++i ) {
size_type * const global_value = global_data + word_count.value * i ;
ValueJoin::join( functor , shared_value , global_value );
ValueOps ::copy( functor , global_value , shared_value );
}
}
}
return is_last_block ;
}
// Size in bytes required for inter block reduce or scan
template< bool DoScan , class FunctorType , class ArgTag >
inline
unsigned cuda_single_inter_block_reduce_scan_shmem( const FunctorType & functor , const unsigned BlockSize )
{
return ( BlockSize + 2 ) * Impl::FunctorValueTraits< FunctorType , ArgTag >::value_size( functor );
}
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( __CUDACC__ ) */
#endif /* KOKKOS_CUDA_REDUCESCAN_HPP */

View File

@ -1,298 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_VECTORIZATION_HPP
#define KOKKOS_CUDA_VECTORIZATION_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <Kokkos_Cuda.hpp>
namespace Kokkos {
// Shuffle only makes sense on >= Kepler GPUs; it doesn't work on CPUs
// or other GPUs. We provide a generic definition (which is trivial
// and doesn't do what it claims to do) because we don't actually use
// this function unless we are on a suitable GPU, with a suitable
// Scalar type. (For example, in the mat-vec, the "ThreadsPerRow"
// internal parameter depends both on the ExecutionSpace and the Scalar type,
// and it controls whether shfl_down() gets called.)
namespace Impl {
template< typename Scalar >
struct shfl_union {
enum {n = sizeof(Scalar)/4};
float fval[n];
KOKKOS_INLINE_FUNCTION
Scalar value() {
return *(Scalar*) fval;
}
KOKKOS_INLINE_FUNCTION
void operator= (Scalar& value_) {
float* const val_ptr = (float*) &value_;
for(int i=0; i<n ; i++) {
fval[i] = val_ptr[i];
}
}
KOKKOS_INLINE_FUNCTION
void operator= (const Scalar& value_) {
float* const val_ptr = (float*) &value_;
for(int i=0; i<n ; i++) {
fval[i] = val_ptr[i];
}
}
};
}
#ifdef __CUDA_ARCH__
#if (__CUDA_ARCH__ >= 300)
KOKKOS_INLINE_FUNCTION
int shfl(const int &val, const int& srcLane, const int& width ) {
return __shfl(val,srcLane,width);
}
KOKKOS_INLINE_FUNCTION
float shfl(const float &val, const int& srcLane, const int& width ) {
return __shfl(val,srcLane,width);
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type& width
) {
Scalar tmp1 = val;
float tmp = *reinterpret_cast<float*>(&tmp1);
tmp = __shfl(tmp,srcLane,width);
return *reinterpret_cast<Scalar*>(&tmp);
}
KOKKOS_INLINE_FUNCTION
double shfl(const double &val, const int& srcLane, const int& width) {
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl(lo,srcLane,width);
hi = __shfl(hi,srcLane,width);
return __hiloint2double(hi,lo);
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) == 8) ,int>::type& width) {
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
lo = __shfl(lo,srcLane,width);
hi = __shfl(hi,srcLane,width);
const double tmp = __hiloint2double(hi,lo);
return *(reinterpret_cast<const Scalar*>(&tmp));
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) > 8) ,int>::type& width) {
Impl::shfl_union<Scalar> s_val;
Impl::shfl_union<Scalar> r_val;
s_val = val;
for(int i = 0; i<s_val.n; i++)
r_val.fval[i] = __shfl(s_val.fval[i],srcLane,width);
return r_val.value();
}
KOKKOS_INLINE_FUNCTION
int shfl_down(const int &val, const int& delta, const int& width) {
return __shfl_down(val,delta,width);
}
KOKKOS_INLINE_FUNCTION
float shfl_down(const float &val, const int& delta, const int& width) {
return __shfl_down(val,delta,width);
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) {
Scalar tmp1 = val;
float tmp = *reinterpret_cast<float*>(&tmp1);
tmp = __shfl_down(tmp,delta,width);
return *reinterpret_cast<Scalar*>(&tmp);
}
KOKKOS_INLINE_FUNCTION
double shfl_down(const double &val, const int& delta, const int& width) {
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl_down(lo,delta,width);
hi = __shfl_down(hi,delta,width);
return __hiloint2double(hi,lo);
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) {
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
lo = __shfl_down(lo,delta,width);
hi = __shfl_down(hi,delta,width);
const double tmp = __hiloint2double(hi,lo);
return *(reinterpret_cast<const Scalar*>(&tmp));
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) > 8) , int >::type & width) {
Impl::shfl_union<Scalar> s_val;
Impl::shfl_union<Scalar> r_val;
s_val = val;
for(int i = 0; i<s_val.n; i++)
r_val.fval[i] = __shfl_down(s_val.fval[i],delta,width);
return r_val.value();
}
KOKKOS_INLINE_FUNCTION
int shfl_up(const int &val, const int& delta, const int& width ) {
return __shfl_up(val,delta,width);
}
KOKKOS_INLINE_FUNCTION
float shfl_up(const float &val, const int& delta, const int& width ) {
return __shfl_up(val,delta,width);
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) {
Scalar tmp1 = val;
float tmp = *reinterpret_cast<float*>(&tmp1);
tmp = __shfl_up(tmp,delta,width);
return *reinterpret_cast<Scalar*>(&tmp);
}
KOKKOS_INLINE_FUNCTION
double shfl_up(const double &val, const int& delta, const int& width ) {
int lo = __double2loint(val);
int hi = __double2hiint(val);
lo = __shfl_up(lo,delta,width);
hi = __shfl_up(hi,delta,width);
return __hiloint2double(hi,lo);
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) {
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
lo = __shfl_up(lo,delta,width);
hi = __shfl_up(hi,delta,width);
const double tmp = __hiloint2double(hi,lo);
return *(reinterpret_cast<const Scalar*>(&tmp));
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) > 8) , int >::type & width) {
Impl::shfl_union<Scalar> s_val;
Impl::shfl_union<Scalar> r_val;
s_val = val;
for(int i = 0; i<s_val.n; i++)
r_val.fval[i] = __shfl_up(s_val.fval[i],delta,width);
return r_val.value();
}
#else
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl(const Scalar &val, const int& srcLane, const int& width) {
if(width > 1) Kokkos::abort("Error: calling shfl from a device with CC<3.0.");
return val;
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_down(const Scalar &val, const int& delta, const int& width) {
if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0.");
return val;
}
template<typename Scalar>
KOKKOS_INLINE_FUNCTION
Scalar shfl_up(const Scalar &val, const int& delta, const int& width) {
if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0.");
return val;
}
#endif
#else
template<typename Scalar>
inline
Scalar shfl(const Scalar &val, const int& srcLane, const int& width) {
if(width > 1) Kokkos::abort("Error: calling shfl from a device with CC<3.0.");
return val;
}
template<typename Scalar>
inline
Scalar shfl_down(const Scalar &val, const int& delta, const int& width) {
if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0.");
return val;
}
template<typename Scalar>
inline
Scalar shfl_up(const Scalar &val, const int& delta, const int& width) {
if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0.");
return val;
}
#endif
}
#endif // KOKKOS_HAVE_CUDA
#endif

View File

@ -1,312 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_VIEW_HPP
#define KOKKOS_CUDA_VIEW_HPP
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#include <cstring>
#include <Kokkos_HostSpace.hpp>
#include <Kokkos_CudaSpace.hpp>
#include <Kokkos_View.hpp>
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template<>
struct AssertShapeBoundsAbort< CudaSpace >
{
KOKKOS_INLINE_FUNCTION
static void apply( const size_t /* rank */ ,
const size_t /* n0 */ , const size_t /* n1 */ ,
const size_t /* n2 */ , const size_t /* n3 */ ,
const size_t /* n4 */ , const size_t /* n5 */ ,
const size_t /* n6 */ , const size_t /* n7 */ ,
const size_t /* arg_rank */ ,
const size_t /* i0 */ , const size_t /* i1 */ ,
const size_t /* i2 */ , const size_t /* i3 */ ,
const size_t /* i4 */ , const size_t /* i5 */ ,
const size_t /* i6 */ , const size_t /* i7 */ )
{
Kokkos::abort("Kokkos::View array bounds violation");
}
};
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
// Cuda Texture fetches can be performed for 4, 8 and 16 byte objects (int,int2,int4)
// Via reinterpret_case this can be used to support all scalar types of those sizes.
// Any other scalar type falls back to either normal reads out of global memory,
// or using the __ldg intrinsic on Kepler GPUs or newer (Compute Capability >= 3.0)
template< typename ValueType
, class MemorySpace
, class AliasType =
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 4 ) , int ,
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 8 ) , ::int2 ,
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 16 ) , ::int4 , void
>::type
>::type
>::type
>
class CudaTextureFetch {
private:
cuda_texture_object_type m_obj ;
const ValueType * m_alloc_ptr ;
int m_offset ;
void attach( const ValueType * const arg_ptr, AllocationTracker const & tracker )
{
typedef char const * const byte;
m_alloc_ptr = reinterpret_cast<ValueType *>(tracker.alloc_ptr());
size_t byte_offset = reinterpret_cast<byte>(arg_ptr) - reinterpret_cast<byte>(m_alloc_ptr);
const bool ok_aligned = 0 == byte_offset % sizeof(ValueType);
const size_t count = tracker.alloc_size() / sizeof(ValueType);
const bool ok_contains = (m_alloc_ptr <= arg_ptr) && (arg_ptr < (m_alloc_ptr + count));
if (ok_aligned && ok_contains) {
if (tracker.attribute() == NULL ) {
MemorySpace::texture_object_attach(
tracker
, sizeof(ValueType)
, cudaCreateChannelDesc< AliasType >()
);
}
m_obj = dynamic_cast<TextureAttribute*>(tracker.attribute())->m_tex_obj;
m_offset = arg_ptr - m_alloc_ptr;
}
else if( !ok_contains ) {
throw_runtime_exception("Error: cannot attach a texture object to a tracker which does not bound the pointer.");
}
else {
throw_runtime_exception("Error: cannot attach a texture object to an incorrectly aligned pointer.");
}
}
public:
KOKKOS_INLINE_FUNCTION
CudaTextureFetch() : m_obj() , m_alloc_ptr() , m_offset() {}
KOKKOS_INLINE_FUNCTION
~CudaTextureFetch() {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const CudaTextureFetch & rhs )
: m_obj( rhs.m_obj )
, m_alloc_ptr( rhs.m_alloc_ptr )
, m_offset( rhs.m_offset )
{}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs )
{
m_obj = rhs.m_obj ;
m_alloc_ptr = rhs.m_alloc_ptr ;
m_offset = rhs.m_offset ;
return *this ;
}
KOKKOS_INLINE_FUNCTION explicit
CudaTextureFetch( const ValueType * const arg_ptr, AllocationTracker const & tracker )
: m_obj( 0 ) , m_alloc_ptr(0) , m_offset(0)
{
#if defined( KOKKOS_USE_LDG_INTRINSIC )
m_alloc_ptr(arg_ptr);
#elif defined( __CUDACC__ ) && ! defined( __CUDA_ARCH__ )
if ( arg_ptr != NULL ) {
if ( tracker.is_valid() ) {
attach( arg_ptr, tracker );
}
else {
AllocationTracker found_tracker = AllocationTracker::find<typename MemorySpace::allocator>(arg_ptr);
if ( found_tracker.is_valid() ) {
attach( arg_ptr, found_tracker );
} else {
throw_runtime_exception("Error: cannot attach a texture object to an untracked pointer!");
}
}
}
#endif
}
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_alloc_ptr + m_offset ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
#if defined( KOKKOS_USE_LDG_INTRINSIC ) && defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
AliasType v = __ldg(reinterpret_cast<AliasType*>(&m_alloc_ptr[i]));
return *(reinterpret_cast<ValueType*> (&v));
#elif defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
AliasType v = tex1Dfetch<AliasType>( m_obj , i + m_offset );
return *(reinterpret_cast<ValueType*> (&v));
#else
return m_alloc_ptr[ i + m_offset ];
#endif
}
};
template< typename ValueType, class MemorySpace >
class CudaTextureFetch< const ValueType, MemorySpace, void >
{
private:
const ValueType * m_ptr ;
public:
KOKKOS_INLINE_FUNCTION
CudaTextureFetch() : m_ptr(0) {};
KOKKOS_INLINE_FUNCTION
~CudaTextureFetch() {
}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const ValueType * ptr, const AllocationTracker & ) : m_ptr(ptr) {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch( const CudaTextureFetch & rhs ) : m_ptr(rhs.m_ptr) {}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs ) {
m_ptr = rhs.m_ptr;
return *this ;
}
explicit KOKKOS_INLINE_FUNCTION
CudaTextureFetch( ValueType * const base_view_ptr, AllocationTracker const & /*tracker*/ ) {
m_ptr = base_view_ptr;
}
KOKKOS_INLINE_FUNCTION
CudaTextureFetch & operator = (const ValueType* base_view_ptr) {
m_ptr = base_view_ptr;
return *this;
}
KOKKOS_INLINE_FUNCTION
operator const ValueType * () const { return m_ptr ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION
ValueType operator[]( const iType & i ) const
{
return m_ptr[ i ];
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** \brief Replace Default ViewDataHandle with Cuda texture fetch specialization
* if 'const' value type, CudaSpace and random access.
*/
template< class ViewTraits >
class ViewDataHandle< ViewTraits ,
typename enable_if< ( is_same< typename ViewTraits::memory_space,CudaSpace>::value ||
is_same< typename ViewTraits::memory_space,CudaUVMSpace>::value )
&&
is_same<typename ViewTraits::const_value_type,typename ViewTraits::value_type>::value
&&
ViewTraits::memory_traits::RandomAccess
>::type >
{
public:
enum { ReturnTypeIsReference = false };
typedef Impl::CudaTextureFetch< typename ViewTraits::value_type
, typename ViewTraits::memory_space> handle_type;
KOKKOS_INLINE_FUNCTION
static handle_type create_handle( typename ViewTraits::value_type * arg_data_ptr, AllocationTracker const & arg_tracker )
{
return handle_type(arg_data_ptr, arg_tracker);
}
typedef typename ViewTraits::value_type return_type;
};
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif // KOKKOS_HAVE_CUDA
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */

View File

@ -1,119 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_ABORT_HPP
#define KOKKOS_CUDA_ABORT_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#include "Kokkos_Macros.hpp"
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
#include <cuda.h>
#if ! defined( CUDA_VERSION ) || ( CUDA_VERSION < 4010 )
#error "Cuda version 4.1 or greater required"
#endif
#if ( __CUDA_ARCH__ < 200 )
#error "Cuda device capability 2.0 or greater required"
#endif
extern "C" {
/* Cuda runtime function, declared in <crt/device_runtime.h>
* Requires capability 2.x or better.
*/
extern __device__ void __assertfail(
const void *message,
const void *file,
unsigned int line,
const void *function,
size_t charsize);
}
namespace Kokkos {
namespace Impl {
__device__ inline
void cuda_abort( const char * const message )
{
#ifndef __APPLE__
const char empty[] = "" ;
__assertfail( (const void *) message ,
(const void *) empty ,
(unsigned int) 0 ,
(const void *) empty ,
sizeof(char) );
#endif
}
} // namespace Impl
} // namespace Kokkos
#else
namespace Kokkos {
namespace Impl {
KOKKOS_INLINE_FUNCTION
void cuda_abort( const char * const ) {}
}
}
#endif /* #if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA )
namespace Kokkos {
__device__ inline
void abort( const char * const message ) { Kokkos::Impl::cuda_abort(message); }
}
#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */

File diff suppressed because it is too large Load Diff

View File

@ -1,285 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_Atomic.hpp
/// \brief Atomic functions
///
/// This header file defines prototypes for the following atomic functions:
/// - exchange
/// - compare and exchange
/// - add
///
/// Supported types include:
/// - signed and unsigned 4 and 8 byte integers
/// - float
/// - double
///
/// They are implemented through GCC compatible intrinsics, OpenMP
/// directives and native CUDA intrinsics.
///
/// Including this header file requires one of the following
/// compilers:
/// - NVCC (for CUDA device code only)
/// - GCC (for host code only)
/// - Intel (for host code only)
/// - A compiler that supports OpenMP 3.1 (for host code only)
#ifndef KOKKOS_ATOMIC_HPP
#define KOKKOS_ATOMIC_HPP
#include <Kokkos_Macros.hpp>
#include <Kokkos_HostSpace.hpp>
#include <impl/Kokkos_Traits.hpp>
//----------------------------------------------------------------------------
#if defined(_WIN32)
#define KOKKOS_ATOMICS_USE_WINDOWS
#else
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
// Compiling NVIDIA device code, must use Cuda atomics:
#define KOKKOS_ATOMICS_USE_CUDA
#elif ! defined( KOKKOS_ATOMICS_USE_GCC ) && \
! defined( KOKKOS_ATOMICS_USE_INTEL ) && \
! defined( KOKKOS_ATOMICS_USE_OMP31 )
// Compiling for non-Cuda atomic implementation has not been pre-selected.
// Choose the best implementation for the detected compiler.
// Preference: GCC, INTEL, OMP31
#if defined( KOKKOS_COMPILER_GNU ) || \
defined( KOKKOS_COMPILER_CLANG ) || \
( defined ( KOKKOS_COMPILER_NVCC ) && defined ( __GNUC__ ) )
#define KOKKOS_ATOMICS_USE_GCC
#elif defined( KOKKOS_COMPILER_INTEL ) || \
defined( KOKKOS_COMPILER_CRAYC )
#define KOKKOS_ATOMICS_USE_INTEL
#elif defined( _OPENMP ) && ( 201107 <= _OPENMP )
#define KOKKOS_ATOMICS_USE_OMP31
#else
#error "KOKKOS_ATOMICS_USE : Unsupported compiler"
#endif
#endif /* Not pre-selected atomic implementation */
#endif
//----------------------------------------------------------------------------
// Forward decalaration of functions supporting arbitrary sized atomics
// This is necessary since Kokkos_Atomic.hpp is internally included very early
// through Kokkos_HostSpace.hpp as well as the allocation tracker.
#ifdef KOKKOS_HAVE_CUDA
namespace Kokkos {
namespace Impl {
/// \brief Aquire a lock for the address
///
/// This function tries to aquire the lock for the hash value derived
/// from the provided ptr. If the lock is successfully aquired the
/// function returns true. Otherwise it returns false.
__device__ inline
bool lock_address_cuda_space(void* ptr);
/// \brief Release lock for the address
///
/// This function releases the lock for the hash value derived
/// from the provided ptr. This function should only be called
/// after previously successfully aquiring a lock with
/// lock_address.
__device__ inline
void unlock_address_cuda_space(void* ptr);
}
}
#endif
namespace Kokkos {
template <typename T>
KOKKOS_INLINE_FUNCTION
void atomic_add(volatile T * const dest, const T src);
// Atomic increment
template<typename T>
KOKKOS_INLINE_FUNCTION
void atomic_increment(volatile T* a);
template<typename T>
KOKKOS_INLINE_FUNCTION
void atomic_decrement(volatile T* a);
}
#if ! defined(_WIN32)
#include<impl/Kokkos_Atomic_Assembly_X86.hpp>
#endif
namespace Kokkos {
inline
const char * atomic_query_version()
{
#if defined( KOKKOS_ATOMICS_USE_CUDA )
return "KOKKOS_ATOMICS_USE_CUDA" ;
#elif defined( KOKKOS_ATOMICS_USE_GCC )
return "KOKKOS_ATOMICS_USE_GCC" ;
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
return "KOKKOS_ATOMICS_USE_INTEL" ;
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
return "KOKKOS_ATOMICS_USE_OMP31" ;
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
return "KOKKOS_ATOMICS_USE_WINDOWS";
#endif
}
} // namespace Kokkos
#ifdef _WIN32
#include "impl/Kokkos_Atomic_Windows.hpp"
#else
//#include "impl/Kokkos_Atomic_Assembly_X86.hpp"
//----------------------------------------------------------------------------
// Atomic exchange
//
// template< typename T >
// T atomic_exchange( volatile T* const dest , const T val )
// { T tmp = *dest ; *dest = val ; return tmp ; }
#include "impl/Kokkos_Atomic_Exchange.hpp"
//----------------------------------------------------------------------------
// Atomic compare-and-exchange
//
// template<class T>
// bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val)
// { bool equal = compare == *dest ; if ( equal ) { *dest = val ; } return equal ; }
#include "impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp"
//----------------------------------------------------------------------------
// Atomic fetch and add
//
// template<class T>
// T atomic_fetch_add(volatile T* const dest, const T val)
// { T tmp = *dest ; *dest += val ; return tmp ; }
#include "impl/Kokkos_Atomic_Fetch_Add.hpp"
//----------------------------------------------------------------------------
// Atomic fetch and sub
//
// template<class T>
// T atomic_fetch_sub(volatile T* const dest, const T val)
// { T tmp = *dest ; *dest -= val ; return tmp ; }
#include "impl/Kokkos_Atomic_Fetch_Sub.hpp"
//----------------------------------------------------------------------------
// Atomic fetch and or
//
// template<class T>
// T atomic_fetch_or(volatile T* const dest, const T val)
// { T tmp = *dest ; *dest = tmp | val ; return tmp ; }
#include "impl/Kokkos_Atomic_Fetch_Or.hpp"
//----------------------------------------------------------------------------
// Atomic fetch and and
//
// template<class T>
// T atomic_fetch_and(volatile T* const dest, const T val)
// { T tmp = *dest ; *dest = tmp & val ; return tmp ; }
#include "impl/Kokkos_Atomic_Fetch_And.hpp"
#endif /*Not _WIN32*/
//----------------------------------------------------------------------------
// Memory fence
//
// All loads and stores from this thread will be globally consistent before continuing
//
// void memory_fence() {...};
#include "impl/Kokkos_Memory_Fence.hpp"
//----------------------------------------------------------------------------
// Provide volatile_load and safe_load
//
// T volatile_load(T const volatile * const ptr);
//
// T const& safe_load(T const * const ptr);
// XEON PHI
// T safe_load(T const * const ptr
#include "impl/Kokkos_Volatile_Load.hpp"
#ifndef _WIN32
#include "impl/Kokkos_Atomic_Generic.hpp"
#endif
//----------------------------------------------------------------------------
// This atomic-style macro should be an inlined function, not a macro
#if defined( KOKKOS_COMPILER_GNU ) && !defined(__PGIC__)
#define KOKKOS_NONTEMPORAL_PREFETCH_LOAD(addr) __builtin_prefetch(addr,0,0)
#define KOKKOS_NONTEMPORAL_PREFETCH_STORE(addr) __builtin_prefetch(addr,1,0)
#else
#define KOKKOS_NONTEMPORAL_PREFETCH_LOAD(addr) ((void)0)
#define KOKKOS_NONTEMPORAL_PREFETCH_STORE(addr) ((void)0)
#endif
//----------------------------------------------------------------------------
#endif /* KOKKOS_ATOMIC_HPP */

View File

@ -1,228 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CORE_HPP
#define KOKKOS_CORE_HPP
//----------------------------------------------------------------------------
// Include the execution space header files for the enabled execution spaces.
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#include <Kokkos_Cuda.hpp>
#endif
#if defined( KOKKOS_HAVE_OPENMP )
#include <Kokkos_OpenMP.hpp>
#endif
#if defined( KOKKOS_HAVE_SERIAL )
#include <Kokkos_Serial.hpp>
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
#include <Kokkos_Threads.hpp>
#endif
#include <Kokkos_Pair.hpp>
#include <Kokkos_View.hpp>
#include <Kokkos_Vectorization.hpp>
#include <Kokkos_Atomic.hpp>
#include <Kokkos_hwloc.hpp>
#include <iostream>
//----------------------------------------------------------------------------
namespace Kokkos {
struct InitArguments {
int num_threads;
int num_numa;
int device_id;
InitArguments() {
num_threads = -1;
num_numa = -1;
device_id = -1;
}
};
void initialize(int& narg, char* arg[]);
void initialize(const InitArguments& args = InitArguments());
/** \brief Finalize the spaces that were initialized via Kokkos::initialize */
void finalize();
/** \brief Finalize all known execution spaces */
void finalize_all();
void fence();
}
#ifdef KOKKOS_HAVE_CXX11
namespace Kokkos {
namespace Impl {
// should only by used by kokkos_malloc and kokkos_free
struct MallocHelper
{
static void increment_ref_count( AllocationTracker const & tracker )
{
tracker.increment_ref_count();
}
static void decrement_ref_count( AllocationTracker const & tracker )
{
tracker.decrement_ref_count();
}
};
} // namespace Impl
/* Allocate memory from a memory space.
* The allocation is tracked in Kokkos memory tracking system, so
* leaked memory can be identified.
*/
template< class Arg = DefaultExecutionSpace>
void* kokkos_malloc(const std::string label, size_t count) {
typedef typename Arg::memory_space MemorySpace;
Impl::AllocationTracker tracker = MemorySpace::allocate_and_track(label,count);;
Impl::MallocHelper::increment_ref_count( tracker );
return tracker.alloc_ptr();
}
template< class Arg = DefaultExecutionSpace>
void* kokkos_malloc(const size_t& count) {
return kokkos_malloc<Arg>("DefaultLabel",count);
}
/* Free memory from a memory space.
*/
template< class Arg = DefaultExecutionSpace>
void kokkos_free(const void* ptr) {
typedef typename Arg::memory_space MemorySpace;
typedef typename MemorySpace::allocator allocator;
Impl::AllocationTracker tracker = Impl::AllocationTracker::find<allocator>(ptr);
if (tracker.is_valid()) {
Impl::MallocHelper::decrement_ref_count( tracker );
}
}
template< class Arg = DefaultExecutionSpace>
const void* kokkos_realloc(const void* old_ptr, size_t size) {
typedef typename Arg::memory_space MemorySpace;
typedef typename MemorySpace::allocator allocator;
Impl::AllocationTracker tracker = Impl::AllocationTracker::find<allocator>(old_ptr);
tracker.reallocate(size);
return tracker.alloc_ptr();
}
} // namespace Kokkos
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
inline
void * kokkos_malloc( const size_t arg_alloc_size )
{
typedef typename Space::memory_space MemorySpace ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordHost ;
RecordHost * const r = RecordHost::allocate( MemorySpace() , "kokkos_malloc" , arg_alloc_size );
RecordBase::increment( r );
return r->data();
}
template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
inline
void kokkos_free( void * arg_alloc )
{
typedef typename Space::memory_space MemorySpace ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordHost ;
RecordHost * const r = RecordHost::get_record( arg_alloc );
RecordBase::decrement( r );
}
template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
inline
void * kokkos_realloc( void * arg_alloc , const size_t arg_alloc_size )
{
typedef typename Space::memory_space MemorySpace ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordHost ;
RecordHost * const r_old = RecordHost::get_record( arg_alloc );
RecordHost * const r_new = RecordHost::allocate( MemorySpace() , "kokkos_malloc" , arg_alloc_size );
Kokkos::Impl::DeepCopy<MemorySpace,MemorySpace>( r_new->data() , r_old->data()
, std::min( r_old->size() , r_new->size() ) );
RecordBase::increment( r_new );
RecordBase::decrement( r_old );
return r_new->data();
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -1,170 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CORE_FWD_HPP
#define KOKKOS_CORE_FWD_HPP
//----------------------------------------------------------------------------
// Kokkos_Macros.hpp does introspection on configuration options
// and compiler environment then sets a collection of #define macros.
#include <Kokkos_Macros.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Forward declarations for class inter-relationships
namespace Kokkos {
class HostSpace ; ///< Memory space for main process and CPU execution spaces
#if defined( KOKKOS_HAVE_SERIAL )
class Serial ; ///< Execution space main process on CPU
#endif // defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_HAVE_PTHREAD )
class Threads ; ///< Execution space with pthreads back-end
#endif
#if defined( KOKKOS_HAVE_OPENMP )
class OpenMP ; ///< OpenMP execution space
#endif
#if defined( KOKKOS_HAVE_CUDA )
class CudaSpace ; ///< Memory space on Cuda GPU
class CudaUVMSpace ; ///< Memory space on Cuda GPU with UVM
class CudaHostPinnedSpace ; ///< Memory space on Host accessible to Cuda GPU
class Cuda ; ///< Execution space for Cuda GPU
#endif
template<class ExecutionSpace, class MemorySpace>
struct Device;
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Set the default execution space.
/// Define Kokkos::DefaultExecutionSpace as per configuration option
/// or chosen from the enabled execution spaces in the following order:
/// Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Serial
namespace Kokkos {
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
typedef Cuda DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef OpenMP DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Threads DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Serial DefaultExecutionSpace ;
#else
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
#endif
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef OpenMP DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Threads DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Serial DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_OPENMP )
typedef OpenMP DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_PTHREAD )
typedef Threads DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_SERIAL )
typedef Serial DefaultHostExecutionSpace ;
#else
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
#endif
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Detect the active execution space and define its memory space.
// This is used to verify whether a running kernel can access
// a given memory space.
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_HAVE_CUDA)
typedef Kokkos::CudaSpace ActiveExecutionMemorySpace ;
#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
typedef Kokkos::HostSpace ActiveExecutionMemorySpace ;
#else
typedef void ActiveExecutionMemorySpace ;
#endif
template< class ActiveSpace , class MemorySpace >
struct VerifyExecutionCanAccessMemorySpace {
enum {value = 0};
};
template< class Space >
struct VerifyExecutionCanAccessMemorySpace< Space , Space >
{
enum {value = 1};
KOKKOS_INLINE_FUNCTION static void verify(void) {}
KOKKOS_INLINE_FUNCTION static void verify(const void *) {}
};
} // namespace Impl
} // namespace Kokkos
#define KOKKOS_RESTRICT_EXECUTION_TO_DATA( DATA_SPACE , DATA_PTR ) \
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \
Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify( DATA_PTR )
#define KOKKOS_RESTRICT_EXECUTION_TO_( DATA_SPACE ) \
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \
Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify()
namespace Kokkos {
void fence();
}
#endif /* #ifndef KOKKOS_CORE_FWD_HPP */

View File

@ -1,268 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDA_HPP
#define KOKKOS_CUDA_HPP
#include <Kokkos_Core_fwd.hpp>
// If CUDA execution space is enabled then use this header file.
#if defined( KOKKOS_HAVE_CUDA )
#include <iosfwd>
#include <vector>
#include <Kokkos_CudaSpace.hpp>
#include <Kokkos_Parallel.hpp>
#include <Kokkos_Layout.hpp>
#include <Kokkos_ScratchSpace.hpp>
#include <Kokkos_MemoryTraits.hpp>
#include <impl/Kokkos_Tags.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
class CudaExec ;
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/// \class Cuda
/// \brief Kokkos Execution Space that uses CUDA to run on GPUs.
///
/// An "execution space" represents a parallel execution model. It tells Kokkos
/// how to parallelize the execution of kernels in a parallel_for or
/// parallel_reduce. For example, the Threads execution space uses Pthreads or
/// C++11 threads on a CPU, the OpenMP execution space uses the OpenMP language
/// extensions, and the Serial execution space executes "parallel" kernels
/// sequentially. The Cuda execution space uses NVIDIA's CUDA programming
/// model to execute kernels in parallel on GPUs.
class Cuda {
public:
//! \name Type declarations that all Kokkos execution spaces must provide.
//@{
//! Tag this class as a kokkos execution space
typedef Cuda execution_space ;
#if defined( KOKKOS_USE_CUDA_UVM )
//! This execution space's preferred memory space.
typedef CudaUVMSpace memory_space ;
#else
//! This execution space's preferred memory space.
typedef CudaSpace memory_space ;
#endif
//! This execution space preferred device_type
typedef Kokkos::Device<execution_space,memory_space> device_type;
//! The size_type best suited for this execution space.
typedef memory_space::size_type size_type ;
//! This execution space's preferred array layout.
typedef LayoutLeft array_layout ;
//!
typedef ScratchMemorySpace< Cuda > scratch_memory_space ;
//@}
//--------------------------------------------------
//! \name Functions that all Kokkos devices must implement.
//@{
/// \brief True if and only if this method is being called in a
/// thread-parallel function.
KOKKOS_INLINE_FUNCTION static int in_parallel() {
#if defined( __CUDA_ARCH__ )
return true;
#else
return false;
#endif
}
/** \brief Set the device in a "sleep" state.
*
* This function sets the device in a "sleep" state in which it is
* not ready for work. This may consume less resources than if the
* device were in an "awake" state, but it may also take time to
* bring the device from a sleep state to be ready for work.
*
* \return True if the device is in the "sleep" state, else false if
* the device is actively working and could not enter the "sleep"
* state.
*/
static bool sleep();
/// \brief Wake the device from the 'sleep' state so it is ready for work.
///
/// \return True if the device is in the "ready" state, else "false"
/// if the device is actively working (which also means that it's
/// awake).
static bool wake();
/// \brief Wait until all dispatched functors complete.
///
/// The parallel_for or parallel_reduce dispatch of a functor may
/// return asynchronously, before the functor completes. This
/// method does not return until all dispatched functors on this
/// device have completed.
static void fence();
//! Free any resources being consumed by the device.
static void finalize();
//! Has been initialized
static int is_initialized();
//! Print configuration information to the given output stream.
static void print_configuration( std::ostream & , const bool detail = false );
//@}
//--------------------------------------------------
//! \name Cuda space instances
~Cuda() {}
Cuda();
explicit Cuda( const int instance_id );
Cuda( const Cuda & ) = default ;
Cuda( Cuda && ) = default ;
Cuda & operator = ( const Cuda & ) = default ;
Cuda & operator = ( Cuda && ) = default ;
//--------------------------------------------------------------------------
//! \name Device-specific functions
//@{
struct SelectDevice {
int cuda_device_id ;
SelectDevice() : cuda_device_id(0) {}
explicit SelectDevice( int id ) : cuda_device_id( id ) {}
};
//! Initialize, telling the CUDA run-time library which device to use.
static void initialize( const SelectDevice = SelectDevice()
, const size_t num_instances = 1 );
/// \brief Cuda device architecture of the selected device.
///
/// This matches the __CUDA_ARCH__ specification.
static size_type device_arch();
//! Query device count.
static size_type detect_device_count();
/** \brief Detect the available devices and their architecture
* as defined by the __CUDA_ARCH__ specification.
*/
static std::vector<unsigned> detect_device_arch();
cudaStream_t cuda_stream() const { return m_stream ; }
int cuda_device() const { return m_device ; }
//@}
//--------------------------------------------------------------------------
private:
cudaStream_t m_stream ;
int m_device ;
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
template<>
struct VerifyExecutionCanAccessMemorySpace
< Kokkos::CudaSpace
, Kokkos::Cuda::scratch_memory_space
>
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
};
template<>
struct VerifyExecutionCanAccessMemorySpace
< Kokkos::HostSpace
, Kokkos::Cuda::scratch_memory_space
>
{
enum { value = false };
inline static void verify( void ) { CudaSpace::access_error(); }
inline static void verify( const void * p ) { CudaSpace::access_error(p); }
};
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
#include <Cuda/Kokkos_CudaExec.hpp>
#include <Cuda/Kokkos_Cuda_View.hpp>
#include <KokkosExp_View.hpp>
#include <Cuda/KokkosExp_Cuda_View.hpp>
#include <Cuda/Kokkos_Cuda_Parallel.hpp>
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_HPP */

View File

@ -1,656 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_CUDASPACE_HPP
#define KOKKOS_CUDASPACE_HPP
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#include <iosfwd>
#include <typeinfo>
#include <string>
#include <Kokkos_HostSpace.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <Cuda/Kokkos_Cuda_abort.hpp>
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/** \brief Cuda on-device memory management */
class CudaSpace {
public:
//! Tag this class as a kokkos memory space
typedef CudaSpace memory_space ;
typedef Kokkos::Cuda execution_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
typedef Impl::CudaMallocAllocator allocator;
/** \brief Allocate a contiguous block of memory.
*
* The input label is associated with the block of memory.
* The block of memory is tracked via reference counting where
* allocation gives it a reference count of one.
*/
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
/*--------------------------------*/
/** \brief Cuda specific function to attached texture object to an allocation.
* Output the texture object, base pointer, and offset from the input pointer.
*/
#if defined( __CUDACC__ )
static void texture_object_attach( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
);
#endif
/*--------------------------------*/
CudaSpace();
CudaSpace( const CudaSpace & rhs ) = default ;
CudaSpace & operator = ( const CudaSpace & rhs ) = default ;
~CudaSpace() = default ;
/**\brief Allocate memory in the cuda space */
void * allocate( const size_t arg_alloc_size ) const ;
/**\brief Deallocate memory in the cuda space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
/*--------------------------------*/
/** \brief Error reporting for HostSpace attempt to access CudaSpace */
static void access_error();
static void access_error( const void * const );
private:
int m_device ; ///< Which Cuda device
// friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ;
};
namespace Impl {
/// \brief Initialize lock array for arbitrary size atomics.
///
/// Arbitrary atomics are implemented using a hash table of locks
/// where the hash value is derived from the address of the
/// object for which an atomic operation is performed.
/// This function initializes the locks to zero (unset).
void init_lock_array_cuda_space();
/// \brief Retrieve the pointer to the lock array for arbitrary size atomics.
///
/// Arbitrary atomics are implemented using a hash table of locks
/// where the hash value is derived from the address of the
/// object for which an atomic operation is performed.
/// This function retrieves the lock array pointer.
/// If the array is not yet allocated it will do so.
int* lock_array_cuda_space_ptr(bool deallocate = false);
}
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/** \brief Cuda memory that is accessible to Host execution space
* through Cuda's unified virtual memory (UVM) runtime.
*/
class CudaUVMSpace {
public:
//! Tag this class as a kokkos memory space
typedef CudaUVMSpace memory_space ;
typedef Cuda execution_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
/** \brief If UVM capability is available */
static bool available();
typedef Impl::CudaUVMAllocator allocator;
/** \brief Allocate a contiguous block of memory.
*
* The input label is associated with the block of memory.
* The block of memory is tracked via reference counting where
* allocation gives it a reference count of one.
*/
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
/** \brief Cuda specific function to attached texture object to an allocation.
* Output the texture object, base pointer, and offset from the input pointer.
*/
#if defined( __CUDACC__ )
static void texture_object_attach( Impl::AllocationTracker const & tracker
, unsigned type_size
, ::cudaChannelFormatDesc const & desc
);
#endif
/*--------------------------------*/
CudaUVMSpace();
CudaUVMSpace( const CudaUVMSpace & rhs ) = default ;
CudaUVMSpace & operator = ( const CudaUVMSpace & rhs ) = default ;
~CudaUVMSpace() = default ;
/**\brief Allocate memory in the cuda space */
void * allocate( const size_t arg_alloc_size ) const ;
/**\brief Deallocate memory in the cuda space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
/*--------------------------------*/
private:
int m_device ; ///< Which Cuda device
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/** \brief Host memory that is accessible to Cuda execution space
* through Cuda's host-pinned memory allocation.
*/
class CudaHostPinnedSpace {
public:
//! Tag this class as a kokkos memory space
/** \brief Memory is in HostSpace so use the HostSpace::execution_space */
typedef HostSpace::execution_space execution_space ;
typedef CudaHostPinnedSpace memory_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
typedef Impl::CudaHostAllocator allocator ;
/** \brief Allocate a contiguous block of memory.
*
* The input label is associated with the block of memory.
* The block of memory is tracked via reference counting where
* allocation gives it a reference count of one.
*/
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
/*--------------------------------*/
CudaHostPinnedSpace();
CudaHostPinnedSpace( const CudaHostPinnedSpace & rhs ) = default ;
CudaHostPinnedSpace & operator = ( const CudaHostPinnedSpace & rhs ) = default ;
~CudaHostPinnedSpace() = default ;
/**\brief Allocate memory in the cuda space */
void * allocate( const size_t arg_alloc_size ) const ;
/**\brief Deallocate memory in the cuda space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
/*--------------------------------*/
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
template<> struct DeepCopy< CudaSpace , CudaSpace >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< CudaSpace , HostSpace >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< HostSpace , CudaSpace >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< CudaSpace , CudaUVMSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace >( dst , src , n ); }
};
template<> struct DeepCopy< CudaSpace , CudaHostPinnedSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace >( dst , src , n ); }
};
template<> struct DeepCopy< CudaUVMSpace , CudaSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace >( dst , src , n ); }
};
template<> struct DeepCopy< CudaUVMSpace , CudaUVMSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , CudaSpace >( dst , src , n ); }
};
template<> struct DeepCopy< CudaUVMSpace , CudaHostPinnedSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace >( dst , src , n ); }
};
template<> struct DeepCopy< CudaUVMSpace , HostSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< CudaSpace , HostSpace >( dst , src , n ); }
};
template<> struct DeepCopy< CudaHostPinnedSpace , CudaSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace >( dst , src , n ); }
};
template<> struct DeepCopy< CudaHostPinnedSpace , CudaUVMSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace >( dst , src , n ); }
};
template<> struct DeepCopy< CudaHostPinnedSpace , CudaHostPinnedSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , HostSpace >( dst , src , n ); }
};
template<> struct DeepCopy< CudaHostPinnedSpace , HostSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , HostSpace >( dst , src , n ); }
};
template<> struct DeepCopy< HostSpace , CudaUVMSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , CudaSpace >( dst , src , n ); }
};
template<> struct DeepCopy< HostSpace , CudaHostPinnedSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , HostSpace >( dst , src , n ); }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** Running in CudaSpace attempting to access HostSpace: error */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::HostSpace >
{
enum { value = false };
KOKKOS_INLINE_FUNCTION static void verify( void )
{ Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
KOKKOS_INLINE_FUNCTION static void verify( const void * )
{ Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
};
/** Running in CudaSpace accessing CudaUVMSpace: ok */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
};
/** Running in CudaSpace accessing CudaHostPinnedSpace: ok */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
};
/** Running in CudaSpace attempting to access an unknown space: error */
template< class OtherSpace >
struct VerifyExecutionCanAccessMemorySpace<
typename enable_if< ! is_same<Kokkos::CudaSpace,OtherSpace>::value , Kokkos::CudaSpace >::type ,
OtherSpace >
{
enum { value = false };
KOKKOS_INLINE_FUNCTION static void verify( void )
{ Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
KOKKOS_INLINE_FUNCTION static void verify( const void * )
{ Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
};
//----------------------------------------------------------------------------
/** Running in HostSpace attempting to access CudaSpace */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaSpace >
{
enum { value = false };
inline static void verify( void ) { CudaSpace::access_error(); }
inline static void verify( const void * p ) { CudaSpace::access_error(p); }
};
/** Running in HostSpace accessing CudaUVMSpace is OK */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaUVMSpace >
{
enum { value = true };
inline static void verify( void ) { }
inline static void verify( const void * ) { }
};
/** Running in HostSpace accessing CudaHostPinnedSpace is OK */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) {}
KOKKOS_INLINE_FUNCTION static void verify( const void * ) {}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
template<>
class SharedAllocationRecord< Kokkos::CudaSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
friend class SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static ::cudaTextureObject_t
attach_texture_object( const unsigned sizeof_alias
, void * const alloc_ptr
, const size_t alloc_size );
static RecordBase s_root_record ;
::cudaTextureObject_t m_tex_obj ;
const Kokkos::CudaSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::CudaSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
);
template< typename AliasType >
inline
::cudaTextureObject_t attach_texture_object()
{
static_assert( ( std::is_same< AliasType , int >::value ||
std::is_same< AliasType , ::int2 >::value ||
std::is_same< AliasType , ::int4 >::value )
, "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
if ( m_tex_obj == 0 ) {
m_tex_obj = attach_texture_object( sizeof(AliasType)
, (void*) RecordBase::m_alloc_ptr
, RecordBase::m_alloc_size );
}
return m_tex_obj ;
}
template< typename AliasType >
inline
int attach_texture_object_offset( const AliasType * const ptr )
{
// Texture object is attached to the entire allocation range
return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
}
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::CudaSpace & , bool detail = false );
};
template<>
class SharedAllocationRecord< Kokkos::CudaUVMSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static RecordBase s_root_record ;
::cudaTextureObject_t m_tex_obj ;
const Kokkos::CudaUVMSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::CudaUVMSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
);
template< typename AliasType >
inline
::cudaTextureObject_t attach_texture_object()
{
static_assert( ( std::is_same< AliasType , int >::value ||
std::is_same< AliasType , ::int2 >::value ||
std::is_same< AliasType , ::int4 >::value )
, "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
if ( m_tex_obj == 0 ) {
m_tex_obj = SharedAllocationRecord< Kokkos::CudaSpace , void >::
attach_texture_object( sizeof(AliasType)
, (void*) RecordBase::m_alloc_ptr
, RecordBase::m_alloc_size );
}
return m_tex_obj ;
}
template< typename AliasType >
inline
int attach_texture_object_offset( const AliasType * const ptr )
{
// Texture object is attached to the entire allocation range
return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
}
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::CudaUVMSpace & , bool detail = false );
};
template<>
class SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static RecordBase s_root_record ;
const Kokkos::CudaHostPinnedSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_space() {}
SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::CudaHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
);
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::CudaHostPinnedSpace & , bool detail = false );
};
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #define KOKKOS_CUDASPACE_HPP */

View File

@ -1,497 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXECPOLICY_HPP
#define KOKKOS_EXECPOLICY_HPP
#include <Kokkos_Core_fwd.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_StaticAssert.hpp>
#include <impl/Kokkos_Tags.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
/** \brief Execution policy for work over a range of an integral type.
*
* Valid template argument options:
*
* With a specified execution space:
* < ExecSpace , WorkTag , { IntConst | IntType } >
* < ExecSpace , WorkTag , void >
* < ExecSpace , { IntConst | IntType } , void >
* < ExecSpace , void , void >
*
* With the default execution space:
* < WorkTag , { IntConst | IntType } , void >
* < WorkTag , void , void >
* < { IntConst | IntType } , void , void >
* < void , void , void >
*
* IntType is a fundamental integral type
* IntConst is an Impl::integral_constant< IntType , Blocking >
*
* Blocking is the granularity of partitioning the range among threads.
*/
template< class Arg0 = void , class Arg1 = void , class Arg2 = void
, class ExecSpace =
// The first argument is the execution space,
// otherwise use the default execution space.
typename Impl::if_c< Impl::is_execution_space< Arg0 >::value , Arg0
, Kokkos::DefaultExecutionSpace >::type
>
class RangePolicy {
private:
// Default integral type and blocking factor:
typedef int DefaultIntType ;
enum { DefaultIntValue = 8 };
enum { Arg0_Void = Impl::is_same< Arg0 , void >::value };
enum { Arg1_Void = Impl::is_same< Arg1 , void >::value };
enum { Arg2_Void = Impl::is_same< Arg2 , void >::value };
enum { Arg0_ExecSpace = Impl::is_execution_space< Arg0 >::value };
enum { Arg0_IntConst = Impl::is_integral_constant< Arg0 >::value };
enum { Arg1_IntConst = Impl::is_integral_constant< Arg1 >::value };
enum { Arg2_IntConst = Impl::is_integral_constant< Arg2 >::value };
enum { Arg0_IntType = Impl::is_integral< Arg0 >::value };
enum { Arg1_IntType = Impl::is_integral< Arg1 >::value };
enum { Arg2_IntType = Impl::is_integral< Arg2 >::value };
enum { Arg0_WorkTag = ! Arg0_ExecSpace && ! Arg0_IntConst && ! Arg0_IntType && ! Arg0_Void };
enum { Arg1_WorkTag = Arg0_ExecSpace && ! Arg1_IntConst && ! Arg1_IntType && ! Arg1_Void };
enum { ArgOption_OK = Impl::StaticAssert< (
( Arg0_ExecSpace && Arg1_WorkTag && ( Arg2_IntConst || Arg2_IntType ) ) ||
( Arg0_ExecSpace && Arg1_WorkTag && Arg2_Void ) ||
( Arg0_ExecSpace && ( Arg1_IntConst || Arg1_IntType ) && Arg2_Void ) ||
( Arg0_ExecSpace && Arg1_Void && Arg2_Void ) ||
( Arg0_WorkTag && ( Arg1_IntConst || Arg1_IntType ) && Arg2_Void ) ||
( Arg0_WorkTag && Arg1_Void && Arg2_Void ) ||
( ( Arg0_IntConst || Arg0_IntType ) && Arg1_Void && Arg2_Void ) ||
( Arg0_Void && Arg1_Void && Arg2_Void )
) >::value };
// The work argument tag is the first or second argument
typedef typename Impl::if_c< Arg0_WorkTag , Arg0 ,
typename Impl::if_c< Arg1_WorkTag , Arg1 , void
>::type >::type
WorkTag ;
enum { Granularity = Arg0_IntConst ? unsigned(Impl::is_integral_constant<Arg0>::integral_value) : (
Arg1_IntConst ? unsigned(Impl::is_integral_constant<Arg1>::integral_value) : (
Arg2_IntConst ? unsigned(Impl::is_integral_constant<Arg2>::integral_value) : (
unsigned(DefaultIntValue) ))) };
// Only accept the integral type if the blocking is a power of two
typedef typename Impl::enable_if< Impl::is_power_of_two< Granularity >::value ,
typename Impl::if_c< Arg0_IntType , Arg0 ,
typename Impl::if_c< Arg1_IntType , Arg1 ,
typename Impl::if_c< Arg2_IntType , Arg2 ,
typename Impl::if_c< Arg0_IntConst , typename Impl::is_integral_constant<Arg0>::integral_type ,
typename Impl::if_c< Arg1_IntConst , typename Impl::is_integral_constant<Arg1>::integral_type ,
typename Impl::if_c< Arg2_IntConst , typename Impl::is_integral_constant<Arg2>::integral_type ,
DefaultIntType
>::type >::type >::type
>::type >::type >::type
>::type
IntType ;
enum { GranularityMask = IntType(Granularity) - 1 };
ExecSpace m_space ;
IntType m_begin ;
IntType m_end ;
public:
//! Tag this class as an execution policy
typedef ExecSpace execution_space ;
typedef RangePolicy execution_policy ;
typedef WorkTag work_tag ;
typedef IntType member_type ;
KOKKOS_INLINE_FUNCTION const execution_space & space() const { return m_space ; }
KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin ; }
KOKKOS_INLINE_FUNCTION member_type end() const { return m_end ; }
inline RangePolicy() : m_space(), m_begin(0), m_end(0) {}
/** \brief Total range */
inline
RangePolicy( const member_type work_begin
, const member_type work_end
)
: m_space()
, m_begin( work_begin < work_end ? work_begin : 0 )
, m_end( work_begin < work_end ? work_end : 0 )
{}
/** \brief Total range */
inline
RangePolicy( const execution_space & work_space
, const member_type work_begin
, const member_type work_end
)
: m_space( work_space )
, m_begin( work_begin < work_end ? work_begin : 0 )
, m_end( work_begin < work_end ? work_end : 0 )
{}
/** \brief Subrange for a partition's rank and size.
*
* Typically used to partition a range over a group of threads.
*/
struct WorkRange {
typedef RangePolicy::work_tag work_tag ;
typedef RangePolicy::member_type member_type ;
KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin ; }
KOKKOS_INLINE_FUNCTION member_type end() const { return m_end ; }
/** \brief Subrange for a partition's rank and size.
*
* Typically used to partition a range over a group of threads.
*/
KOKKOS_INLINE_FUNCTION
WorkRange( const RangePolicy & range
, const int part_rank
, const int part_size
)
: m_begin(0), m_end(0)
{
if ( part_size ) {
// Split evenly among partitions, then round up to the granularity.
const member_type work_part =
( ( ( ( range.end() - range.begin() ) + ( part_size - 1 ) ) / part_size )
+ GranularityMask ) & ~member_type(GranularityMask);
m_begin = range.begin() + work_part * part_rank ;
m_end = m_begin + work_part ;
if ( range.end() < m_begin ) m_begin = range.end() ;
if ( range.end() < m_end ) m_end = range.end() ;
}
}
private:
member_type m_begin ;
member_type m_end ;
WorkRange();
WorkRange & operator = ( const WorkRange & );
};
};
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/** \brief Execution policy for parallel work over a league of teams of threads.
*
* The work functor is called for each thread of each team such that
* the team's member threads are guaranteed to be concurrent.
*
* The team's threads have access to team shared scratch memory and
* team collective operations.
*
* If the WorkTag is non-void then the first calling argument of the
* work functor's parentheses operator is 'const WorkTag &'.
* This allows a functor to have multiple work member functions.
*
* template argument option with specified execution space:
* < ExecSpace , WorkTag >
* < ExecSpace , void >
*
* template argument option with default execution space:
* < WorkTag , void >
* < void , void >
*/
template< class Arg0 = void
, class Arg1 = void
, class ExecSpace =
// If the first argument is not an execution
// then use the default execution space.
typename Impl::if_c< Impl::is_execution_space< Arg0 >::value , Arg0
, Kokkos::DefaultExecutionSpace >::type
>
class TeamPolicy {
private:
enum { Arg0_ExecSpace = Impl::is_execution_space< Arg0 >::value };
enum { Arg1_Void = Impl::is_same< Arg1 , void >::value };
enum { ArgOption_OK = Impl::StaticAssert< ( Arg0_ExecSpace || Arg1_Void ) >::value };
typedef typename Impl::if_c< Arg0_ExecSpace , Arg1 , Arg0 >::type WorkTag ;
public:
//! Tag this class as an execution policy
typedef TeamPolicy execution_policy ;
typedef ExecSpace execution_space ;
typedef WorkTag work_tag ;
//----------------------------------------
/** \brief Query maximum team size for a given functor.
*
* This size takes into account execution space concurrency limitations and
* scratch memory space limitations for reductions, team reduce/scan, and
* team shared memory.
*/
template< class FunctorType >
static int team_size_max( const FunctorType & );
/** \brief Query recommended team size for a given functor.
*
* This size takes into account execution space concurrency limitations and
* scratch memory space limitations for reductions, team reduce/scan, and
* team shared memory.
*/
template< class FunctorType >
static int team_size_recommended( const FunctorType & );
template< class FunctorType >
static int team_size_recommended( const FunctorType & , const int&);
//----------------------------------------
/** \brief Construct policy with the given instance of the execution space */
TeamPolicy( const execution_space & , int league_size_request , int team_size_request );
/** \brief Construct policy with the default instance of the execution space */
TeamPolicy( int league_size_request , int team_size_request );
/** \brief The actual league size (number of teams) of the policy.
*
* This may be smaller than the requested league size due to limitations
* of the execution space.
*/
KOKKOS_INLINE_FUNCTION int league_size() const ;
/** \brief The actual team size (number of threads per team) of the policy.
*
* This may be smaller than the requested team size due to limitations
* of the execution space.
*/
KOKKOS_INLINE_FUNCTION int team_size() const ;
/** \brief Parallel execution of a functor calls the functor once with
* each member of the execution policy.
*/
struct member_type {
/** \brief Handle to the currently executing team shared scratch memory */
KOKKOS_INLINE_FUNCTION
typename execution_space::scratch_memory_space team_shmem() const ;
/** \brief Rank of this team within the league of teams */
KOKKOS_INLINE_FUNCTION int league_rank() const ;
/** \brief Number of teams in the league */
KOKKOS_INLINE_FUNCTION int league_size() const ;
/** \brief Rank of this thread within this team */
KOKKOS_INLINE_FUNCTION int team_rank() const ;
/** \brief Number of threads in this team */
KOKKOS_INLINE_FUNCTION int team_size() const ;
/** \brief Barrier among the threads of this team */
KOKKOS_INLINE_FUNCTION void team_barrier() const ;
/** \brief Intra-team reduction. Returns join of all values of the team members. */
template< class JoinOp >
KOKKOS_INLINE_FUNCTION
typename JoinOp::value_type team_reduce( const typename JoinOp::value_type
, const JoinOp & ) const ;
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
*
* The highest rank thread can compute the reduction total as
* reduction_total = dev.team_scan( value ) + value ;
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const ;
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
* with intra-team non-deterministic ordering accumulation.
*
* The global inter-team accumulation value will, at the end of the
* league's parallel execution, be the scan's total.
* Parallel execution ordering of the league's teams is non-deterministic.
* As such the base value for each team's scan operation is similarly
* non-deterministic.
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const ;
};
};
} // namespace Kokkos
namespace Kokkos {
namespace Impl {
template<typename iType, class TeamMemberType>
struct TeamThreadRangeBoundariesStruct {
private:
KOKKOS_INLINE_FUNCTION static
iType ibegin( const iType & arg_begin
, const iType & arg_end
, const iType & arg_rank
, const iType & arg_size
)
{
return arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * arg_rank ;
}
KOKKOS_INLINE_FUNCTION static
iType iend( const iType & arg_begin
, const iType & arg_end
, const iType & arg_rank
, const iType & arg_size
)
{
const iType end_ = arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * ( arg_rank + 1 );
return end_ < arg_end ? end_ : arg_end ;
}
public:
typedef iType index_type;
const iType start;
const iType end;
enum {increment = 1};
const TeamMemberType& thread;
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct( const TeamMemberType& arg_thread
, const iType& arg_end
)
: start( ibegin( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
, end( iend( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
, thread( arg_thread )
{}
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct( const TeamMemberType& arg_thread
, const iType& arg_begin
, const iType& arg_end
)
: start( ibegin( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
, end( iend( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
, thread( arg_thread )
{}
};
template<typename iType, class TeamMemberType>
struct ThreadVectorRangeBoundariesStruct {
typedef iType index_type;
enum {start = 0};
const iType end;
enum {increment = 1};
KOKKOS_INLINE_FUNCTION
ThreadVectorRangeBoundariesStruct (const TeamMemberType& thread, const iType& count):
end( count )
{}
};
template<class TeamMemberType>
struct ThreadSingleStruct {
const TeamMemberType& team_member;
KOKKOS_INLINE_FUNCTION
ThreadSingleStruct(const TeamMemberType& team_member_):team_member(team_member_){}
};
template<class TeamMemberType>
struct VectorSingleStruct {
const TeamMemberType& team_member;
KOKKOS_INLINE_FUNCTION
VectorSingleStruct(const TeamMemberType& team_member_):team_member(team_member_){}
};
} // namespace Impl
/** \brief Execution policy for parallel work over a threads within a team.
*
* The range is split over all threads in a team. The Mapping scheme depends on the architecture.
* This policy is used together with a parallel pattern as a nested layer within a kernel launched
* with the TeamPolicy. This variant expects a single count. So the range is (0,count].
*/
template<typename iType, class TeamMemberType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(const TeamMemberType&, const iType& count);
/** \brief Execution policy for parallel work over a threads within a team.
*
* The range is split over all threads in a team. The Mapping scheme depends on the architecture.
* This policy is used together with a parallel pattern as a nested layer within a kernel launched
* with the TeamPolicy. This variant expects a begin and end. So the range is (begin,end].
*/
template<typename iType, class TeamMemberType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(const TeamMemberType&, const iType& begin, const iType& end);
/** \brief Execution policy for a vector parallel loop.
*
* The range is split over all vector lanes in a thread. The Mapping scheme depends on the architecture.
* This policy is used together with a parallel pattern as a nested layer within a kernel launched
* with the TeamPolicy. This variant expects a single count. So the range is (0,count].
*/
template<typename iType, class TeamMemberType>
KOKKOS_INLINE_FUNCTION
Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType> ThreadVectorRange(const TeamMemberType&, const iType& count);
} // namespace Kokkos
#endif /* #define KOKKOS_EXECPOLICY_HPP */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

View File

@ -1,270 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_HOSTSPACE_HPP
#define KOKKOS_HOSTSPACE_HPP
#include <cstring>
#include <string>
#include <iosfwd>
#include <typeinfo>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_MemoryTraits.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Error.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <impl/Kokkos_BasicAllocators.hpp>
#include <impl/KokkosExp_SharedAlloc.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
/// \brief Initialize lock array for arbitrary size atomics.
///
/// Arbitrary atomics are implemented using a hash table of locks
/// where the hash value is derived from the address of the
/// object for which an atomic operation is performed.
/// This function initializes the locks to zero (unset).
void init_lock_array_host_space();
/// \brief Aquire a lock for the address
///
/// This function tries to aquire the lock for the hash value derived
/// from the provided ptr. If the lock is successfully aquired the
/// function returns true. Otherwise it returns false.
bool lock_address_host_space(void* ptr);
/// \brief Release lock for the address
///
/// This function releases the lock for the hash value derived
/// from the provided ptr. This function should only be called
/// after previously successfully aquiring a lock with
/// lock_address.
void unlock_address_host_space(void* ptr);
} // namespace Impl
} // namespace Kokkos
namespace Kokkos {
/// \class HostSpace
/// \brief Memory management for host memory.
///
/// HostSpace is a memory space that governs host memory. "Host"
/// memory means the usual CPU-accessible memory.
class HostSpace {
public:
//! Tag this class as a kokkos memory space
typedef HostSpace memory_space ;
typedef size_t size_type ;
/// \typedef execution_space
/// \brief Default execution space for this memory space.
///
/// Every memory space has a default execution space. This is
/// useful for things like initializing a View (which happens in
/// parallel using the View's default execution space).
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_PTHREAD )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_SERIAL )
typedef Kokkos::Serial execution_space ;
#else
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."
#endif
//! This memory space preferred device_type
typedef Kokkos::Device<execution_space,memory_space> device_type;
#if defined( KOKKOS_USE_PAGE_ALIGNED_HOST_MEMORY )
typedef Impl::PageAlignedAllocator allocator ;
#else
typedef Impl::AlignedAllocator allocator ;
#endif
/** \brief Allocate a contiguous block of memory.
*
* The input label is associated with the block of memory.
* The block of memory is tracked via reference counting where
* allocation gives it a reference count of one.
*/
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
/*--------------------------------*/
/* Functions unique to the HostSpace */
static int in_parallel();
static void register_in_parallel( int (*)() );
/*--------------------------------*/
/**\brief Default memory space instance */
HostSpace();
HostSpace( const HostSpace & rhs ) = default ;
HostSpace & operator = ( const HostSpace & ) = default ;
~HostSpace() = default ;
/**\brief Non-default memory space instance to choose allocation mechansim, if available */
enum AllocationMechanism { STD_MALLOC , POSIX_MEMALIGN , POSIX_MMAP , INTEL_MM_ALLOC };
explicit
HostSpace( const AllocationMechanism & );
/**\brief Allocate memory in the host space */
void * allocate( const size_t arg_alloc_size ) const ;
/**\brief Deallocate memory in the host space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
private:
AllocationMechanism m_alloc_mech ;
friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > ;
};
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
template<>
class SharedAllocationRecord< Kokkos::HostSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
friend Kokkos::HostSpace ;
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
/**\brief Root record for tracked allocations from this HostSpace instance */
static RecordBase s_root_record ;
const Kokkos::HostSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() = default ;
SharedAllocationRecord( const Kokkos::HostSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
inline
std::string get_label() const
{
return std::string( RecordBase::head()->m_label );
}
KOKKOS_INLINE_FUNCTION static
SharedAllocationRecord * allocate( const Kokkos::HostSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
)
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
#else
return (SharedAllocationRecord *) 0 ;
#endif
}
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::HostSpace & , bool detail = false );
};
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class , class > struct DeepCopy ;
template<>
struct DeepCopy<HostSpace,HostSpace> {
DeepCopy( void * dst , const void * src , size_t n );
};
} // namespace Impl
} // namespace Kokkos
#endif /* #define KOKKOS_HOSTSPACE_HPP */

View File

@ -1,174 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_Layout.hpp
/// \brief Declaration of various \c MemoryLayout options.
#ifndef KOKKOS_LAYOUT_HPP
#define KOKKOS_LAYOUT_HPP
#include <stddef.h>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Tags.hpp>
namespace Kokkos {
//----------------------------------------------------------------------------
/// \struct LayoutLeft
/// \brief Memory layout tag indicating left-to-right (Fortran scheme)
/// striding of multi-indices.
///
/// This is an example of a \c MemoryLayout template parameter of
/// View. The memory layout describes how View maps from a
/// multi-index (i0, i1, ..., ik) to a memory location.
///
/// "Layout left" indicates a mapping where the leftmost index i0
/// refers to contiguous access, and strides increase for dimensions
/// going right from there (i1, i2, ...). This layout imitates how
/// Fortran stores multi-dimensional arrays. For the special case of
/// a two-dimensional array, "layout left" is also called "column
/// major."
struct LayoutLeft {
//! Tag this class as a kokkos array layout
typedef LayoutLeft array_layout ;
};
//----------------------------------------------------------------------------
/// \struct LayoutRight
/// \brief Memory layout tag indicating right-to-left (C or
/// lexigraphical scheme) striding of multi-indices.
///
/// This is an example of a \c MemoryLayout template parameter of
/// View. The memory layout describes how View maps from a
/// multi-index (i0, i1, ..., ik) to a memory location.
///
/// "Right layout" indicates a mapping where the rightmost index ik
/// refers to contiguous access, and strides increase for dimensions
/// going left from there. This layout imitates how C stores
/// multi-dimensional arrays. For the special case of a
/// two-dimensional array, "layout right" is also called "row major."
struct LayoutRight {
//! Tag this class as a kokkos array layout
typedef LayoutRight array_layout ;
};
//----------------------------------------------------------------------------
/// \struct LayoutStride
/// \brief Memory layout tag indicated arbitrarily strided
/// multi-index mapping into contiguous memory.
struct LayoutStride {
//! Tag this class as a kokkos array layout
typedef LayoutStride array_layout ;
enum { MAX_RANK = 8 };
size_t dimension[ MAX_RANK ] ;
size_t stride[ MAX_RANK ] ;
/** \brief Compute strides from ordered dimensions.
*
* Values of order uniquely form the set [0..rank)
* and specify ordering of the dimensions.
* Order = {0,1,2,...} is LayoutLeft
* Order = {...,2,1,0} is LayoutRight
*/
template< typename iTypeOrder , typename iTypeDimen >
KOKKOS_INLINE_FUNCTION static
LayoutStride order_dimensions( int const rank
, iTypeOrder const * const order
, iTypeDimen const * const dimen )
{
LayoutStride tmp ;
// Verify valid rank order:
int check_input = MAX_RANK < rank ? 0 : int( 1 << rank ) - 1 ;
for ( int r = 0 ; r < MAX_RANK ; ++r ) {
tmp.dimension[r] = 0 ;
tmp.stride[r] = 0 ;
check_input &= ~int( 1 << order[r] );
}
if ( 0 == check_input ) {
size_t n = 1 ;
for ( int r = 0 ; r < rank ; ++r ) {
tmp.stride[ order[r] ] = n ;
n *= ( dimen[order[r]] );
tmp.dimension[r] = dimen[r];
}
}
return tmp ;
}
};
//----------------------------------------------------------------------------
/// \struct LayoutTileLeft
/// \brief Memory layout tag indicating left-to-right (Fortran scheme)
/// striding of multi-indices by tiles.
///
/// This is an example of a \c MemoryLayout template parameter of
/// View. The memory layout describes how View maps from a
/// multi-index (i0, i1, ..., ik) to a memory location.
///
/// "Tiled layout" indicates a mapping to contiguously stored
/// <tt>ArgN0</tt> by <tt>ArgN1</tt> tiles for the rightmost two
/// dimensions. Indices are LayoutLeft within each tile, and the
/// tiles themselves are arranged using LayoutLeft. Note that the
/// dimensions <tt>ArgN0</tt> and <tt>ArgN1</tt> of the tiles must be
/// compile-time constants. This speeds up index calculations. If
/// both tile dimensions are powers of two, Kokkos can optimize
/// further.
template < unsigned ArgN0 , unsigned ArgN1 ,
bool IsPowerOfTwo = ( Impl::is_power_of_two<ArgN0>::value &&
Impl::is_power_of_two<ArgN1>::value )
>
struct LayoutTileLeft {
//! Tag this class as a kokkos array layout
typedef LayoutTileLeft<ArgN0,ArgN1,IsPowerOfTwo> array_layout ;
enum { N0 = ArgN0 };
enum { N1 = ArgN1 };
};
} // namespace Kokkos
#endif // #ifndef KOKKOS_LAYOUT_HPP

View File

@ -1,397 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_MACROS_HPP
#define KOKKOS_MACROS_HPP
//----------------------------------------------------------------------------
/** Pick up configure/build options via #define macros:
*
* KOKKOS_HAVE_CUDA Kokkos::Cuda execution and memory spaces
* KOKKOS_HAVE_PTHREAD Kokkos::Threads execution space
* KOKKOS_HAVE_QTHREAD Kokkos::Qthread execution space
* KOKKOS_HAVE_OPENMP Kokkos::OpenMP execution space
* KOKKOS_HAVE_HWLOC HWLOC library is available
* KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK insert array bounds checks, is expensive!
* KOKKOS_HAVE_CXX11 enable C++11 features
*
* KOKKOS_HAVE_MPI negotiate MPI/execution space interactions
*
* KOKKOS_USE_CUDA_UVM Use CUDA UVM for Cuda memory space
*/
#ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
#include <KokkosCore_config.h>
#endif
//----------------------------------------------------------------------------
/** Pick up compiler specific #define macros:
*
* Macros for known compilers evaluate to an integral version value
*
* KOKKOS_COMPILER_NVCC
* KOKKOS_COMPILER_GNU
* KOKKOS_COMPILER_INTEL
* KOKKOS_COMPILER_IBM
* KOKKOS_COMPILER_CRAYC
* KOKKOS_COMPILER_APPLECC
* KOKKOS_COMPILER_CLANG
* KOKKOS_COMPILER_PGI
*
* Macros for which compiler extension to use for atomics on intrinsice types
*
* KOKKOS_ATOMICS_USE_CUDA
* KOKKOS_ATOMICS_USE_GNU
* KOKKOS_ATOMICS_USE_INTEL
* KOKKOS_ATOMICS_USE_OPENMP31
*
* A suite of 'KOKKOS_HAVE_PRAGMA_...' are defined for internal use.
*
* Macros for marking functions to run in an execution space:
*
* KOKKOS_FUNCTION
* KOKKOS_INLINE_FUNCTION request compiler to inline
* KOKKOS_FORCEINLINE_FUNCTION force compiler to inline, use with care!
*/
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ )
/* Compiling with a CUDA compiler.
*
* Include <cuda.h> to pick up the CUDA_VERSION macro defined as:
* CUDA_VERSION = ( MAJOR_VERSION * 1000 ) + ( MINOR_VERSION * 10 )
*
* When generating device code the __CUDA_ARCH__ macro is defined as:
* __CUDA_ARCH__ = ( MAJOR_CAPABILITY * 100 ) + ( MINOR_CAPABILITY * 10 )
*/
#include <cuda_runtime.h>
#include <cuda.h>
#if ! defined( CUDA_VERSION )
#error "#include <cuda.h> did not define CUDA_VERSION"
#endif
#if ( CUDA_VERSION < 6050 )
// CUDA supports (inofficially) C++11 in device code starting with
// version 6.5. This includes auto type and device code internal
// lambdas.
#error "Cuda version 6.5 or greater required"
#endif
#if defined( __CUDA_ARCH__ ) && ( __CUDA_ARCH__ < 300 )
/* Compiling with CUDA compiler for device code. */
#error "Cuda device capability >= 3.0 is required"
#endif
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ ) */
/*--------------------------------------------------------------------------*/
/* Language info: C++, CUDA, OPENMP */
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
// Compiling Cuda code to 'ptx'
#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
#define KOKKOS_INLINE_FUNCTION __device__ __host__ inline
#define KOKKOS_FUNCTION __device__ __host__
#endif /* #if defined( __CUDA_ARCH__ ) */
#if defined( _OPENMP )
/* Compiling with OpenMP.
* The value of _OPENMP is an integer value YYYYMM
* where YYYY and MM are the year and month designation
* of the supported OpenMP API version.
*/
#endif /* #if defined( _OPENMP ) */
/*--------------------------------------------------------------------------*/
/* Mapping compiler built-ins to KOKKOS_COMPILER_*** macros */
#if defined( __NVCC__ )
// NVIDIA compiler is being used.
// Code is parsed and separated into host and device code.
// Host code is compiled again with another compiler.
// Device code is compile to 'ptx'.
#define KOKKOS_COMPILER_NVCC __NVCC__
#else
#if defined( KOKKOS_HAVE_CXX11 ) && ! defined( KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA )
// CUDA (including version 6.5) does not support giving lambdas as
// arguments to global functions. Thus its not currently possible
// to dispatch lambdas from the host.
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
#endif
#endif /* #if defined( __NVCC__ ) */
#if defined( KOKKOS_HAVE_CXX11 ) && !defined (KOKKOS_LAMBDA)
#define KOKKOS_LAMBDA [=]
#endif
#if ! defined( __CUDA_ARCH__ ) /* Not compiling Cuda code to 'ptx'. */
/* Intel compiler for host code */
#if defined( __INTEL_COMPILER )
#define KOKKOS_COMPILER_INTEL __INTEL_COMPILER
#elif defined( __ICC )
// Old define
#define KOKKOS_COMPILER_INTEL __ICC
#elif defined( __ECC )
// Very old define
#define KOKKOS_COMPILER_INTEL __ECC
#endif
/* CRAY compiler for host code */
#if defined( _CRAYC )
#define KOKKOS_COMPILER_CRAYC _CRAYC
#endif
#if defined( __IBMCPP__ )
// IBM C++
#define KOKKOS_COMPILER_IBM __IBMCPP__
#elif defined( __IBMC__ )
#define KOKKOS_COMPILER_IBM __IBMC__
#endif
#if defined( __APPLE_CC__ )
#define KOKKOS_COMPILER_APPLECC __APPLE_CC__
#endif
#if defined (__clang__) && !defined (KOKKOS_COMPILER_INTEL)
#define KOKKOS_COMPILER_CLANG __clang_major__*100+__clang_minor__*10+__clang_patchlevel__
#endif
#if ! defined( __clang__ ) && ! defined( KOKKOS_COMPILER_INTEL ) &&defined( __GNUC__ )
#define KOKKOS_COMPILER_GNU __GNUC__*100+__GNUC_MINOR__*10+__GNUC_PATCHLEVEL__
#endif
#if defined( __PGIC__ ) && ! defined( __GNUC__ )
#define KOKKOS_COMPILER_PGI __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__
#endif
#endif /* #if ! defined( __CUDA_ARCH__ ) */
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
/* Intel compiler macros */
#if defined( KOKKOS_COMPILER_INTEL )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
#define KOKKOS_HAVE_PRAGMA_SIMD 1
#if ( 1200 <= KOKKOS_COMPILER_INTEL ) && ! defined( KOKKOS_ENABLE_ASM ) && ! defined( _WIN32 )
#define KOKKOS_ENABLE_ASM 1
#endif
#if ( 1200 <= KOKKOS_COMPILER_INTEL ) && ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#if !defined (_WIN32)
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
#else
#define KOKKOS_FORCEINLINE_FUNCTION inline
#endif
#endif
#if defined( __MIC__ )
// Compiling for Xeon Phi
#endif
#endif
/*--------------------------------------------------------------------------*/
/* Cray compiler macros */
#if defined( KOKKOS_COMPILER_CRAYC )
#endif
/*--------------------------------------------------------------------------*/
/* IBM Compiler macros */
#if defined( KOKKOS_COMPILER_IBM )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#endif
/*--------------------------------------------------------------------------*/
/* CLANG compiler macros */
#if defined( KOKKOS_COMPILER_CLANG )
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
#endif
#endif
/*--------------------------------------------------------------------------*/
/* GNU Compiler macros */
#if defined( KOKKOS_COMPILER_GNU )
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
#endif
#if ! defined( KOKKOS_ENABLE_ASM ) && \
! ( defined( __powerpc) || \
defined(__powerpc__) || \
defined(__powerpc64__) || \
defined(__POWERPC__) || \
defined(__ppc__) || \
defined(__ppc64__) || \
defined(__PGIC__) )
#define KOKKOS_ENABLE_ASM 1
#endif
#endif
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_COMPILER_PGI )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#endif
/*--------------------------------------------------------------------------*/
#if defined( KOKKOS_COMPILER_NVCC )
#if defined(__CUDA_ARCH__ )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#endif
#endif
//----------------------------------------------------------------------------
/** Define function marking macros if compiler specific macros are undefined: */
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#define KOKKOS_FORCEINLINE_FUNCTION inline
#endif
#if ! defined( KOKKOS_INLINE_FUNCTION )
#define KOKKOS_INLINE_FUNCTION inline
#endif
#if ! defined( KOKKOS_FUNCTION )
#define KOKKOS_FUNCTION /**/
#endif
//----------------------------------------------------------------------------
/** Determine the default execution space for parallel dispatch.
* There is zero or one default execution space specified.
*/
#if 1 < ( ( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
#error "More than one KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_* specified" ;
#endif
/** If default is not specified then chose from enabled execution spaces.
* Priority: CUDA, OPENMP, THREADS, SERIAL
*/
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_HAVE_CUDA )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
#elif defined ( KOKKOS_HAVE_OPENMP )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
#elif defined ( KOKKOS_HAVE_PTHREAD )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
#else
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
#endif
//----------------------------------------------------------------------------
/** Determine for what space the code is being compiled: */
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_HAVE_CUDA)
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA
#else
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_MACROS_HPP */

View File

@ -1,116 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_MEMORYTRAITS_HPP
#define KOKKOS_MEMORYTRAITS_HPP
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Tags.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
/** \brief Memory access traits for views, an extension point.
*
* These traits should be orthogonal. If there are dependencies then
* the MemoryTraits template must detect and enforce dependencies.
*
* A zero value is the default for a View, indicating that none of
* these traits are present.
*/
enum MemoryTraitsFlags
{ Unmanaged = 0x01
, RandomAccess = 0x02
, Atomic = 0x04
};
template < unsigned T >
struct MemoryTraits {
//! Tag this class as a kokkos memory traits:
typedef MemoryTraits memory_traits ;
enum { Unmanaged = T & unsigned(Kokkos::Unmanaged) };
enum { RandomAccess = T & unsigned(Kokkos::RandomAccess) };
enum { Atomic = T & unsigned(Kokkos::Atomic) };
};
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
typedef Kokkos::MemoryTraits<0> MemoryManaged ;
typedef Kokkos::MemoryTraits< Kokkos::Unmanaged > MemoryUnmanaged ;
typedef Kokkos::MemoryTraits< Kokkos::Unmanaged | Kokkos::RandomAccess > MemoryRandomAccess ;
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** \brief Memory alignment settings
*
* Sets global value for memory alignment. Must be a power of two!
* Enable compatibility of views from different devices with static stride.
* Use compiler flag to enable overwrites.
*/
enum { MEMORY_ALIGNMENT =
#if defined( KOKKOS_MEMORY_ALIGNMENT )
( 1 << Kokkos::Impl::power_of_two< KOKKOS_MEMORY_ALIGNMENT >::value )
#else
( 1 << Kokkos::Impl::power_of_two< 128 >::value )
#endif
, MEMORY_ALIGNMENT_THRESHOLD = 4
};
} //namespace Impl
} // namespace Kokkos
#endif /* #ifndef KOKKOS_MEMORYTRAITS_HPP */

View File

@ -1,175 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_OPENMP_HPP
#define KOKKOS_OPENMP_HPP
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP )
#include <omp.h>
#include <cstddef>
#include <iosfwd>
#include <Kokkos_HostSpace.hpp>
#include <Kokkos_ScratchSpace.hpp>
#include <Kokkos_Parallel.hpp>
#include <Kokkos_Layout.hpp>
#include <impl/Kokkos_Tags.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/// \class OpenMP
/// \brief Kokkos device for multicore processors in the host memory space.
class OpenMP {
public:
//------------------------------------
//! \name Type declarations that all Kokkos devices must provide.
//@{
//! Tag this class as a kokkos execution space
typedef OpenMP execution_space ;
typedef HostSpace memory_space ;
//! This execution space preferred device_type
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef LayoutRight array_layout ;
typedef HostSpace::size_type size_type ;
typedef ScratchMemorySpace< OpenMP > scratch_memory_space ;
//@}
//------------------------------------
//! \name Functions that all Kokkos devices must implement.
//@{
inline static bool in_parallel() { return omp_in_parallel(); }
/** \brief Set the device in a "sleep" state. A noop for OpenMP. */
static bool sleep();
/** \brief Wake the device from the 'sleep' state. A noop for OpenMP. */
static bool wake();
/** \brief Wait until all dispatched functors complete. A noop for OpenMP. */
static void fence() {}
/// \brief Print configuration information to the given output stream.
static void print_configuration( std::ostream & , const bool detail = false );
/// \brief Free any resources being consumed by the device.
static void finalize();
/** \brief Initialize the device.
*
* 1) If the hardware locality library is enabled and OpenMP has not
* already bound threads then bind OpenMP threads to maximize
* core utilization and group for memory hierarchy locality.
*
* 2) Allocate a HostThread for each OpenMP thread to hold its
* topology and fan in/out data.
*/
static void initialize( unsigned thread_count = 0 ,
unsigned use_numa_count = 0 ,
unsigned use_cores_per_numa = 0 );
static int is_initialized();
//@}
//------------------------------------
/** \brief This execution space has a topological thread pool which can be queried.
*
* All threads within a pool have a common memory space for which they are cache coherent.
* depth = 0 gives the number of threads in the whole pool.
* depth = 1 gives the number of threads in a NUMA region, typically sharing L3 cache.
* depth = 2 gives the number of threads at the finest granularity, typically sharing L1 cache.
*/
inline static int thread_pool_size( int depth = 0 );
/** \brief The rank of the executing thread in this thread pool */
KOKKOS_INLINE_FUNCTION static int thread_pool_rank();
//------------------------------------
inline static unsigned max_hardware_threads() { return thread_pool_size(0); }
KOKKOS_INLINE_FUNCTION static
unsigned hardware_thread_id() { return thread_pool_rank(); }
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
template<>
struct VerifyExecutionCanAccessMemorySpace
< Kokkos::OpenMP::memory_space
, Kokkos::OpenMP::scratch_memory_space
>
{
enum { value = true };
inline static void verify( void ) { }
inline static void verify( const void * ) { }
};
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
#include <OpenMP/Kokkos_OpenMPexec.hpp>
#include <OpenMP/Kokkos_OpenMP_Parallel.hpp>
/*--------------------------------------------------------------------------*/
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP ) */
#endif /* #ifndef KOKKOS_OPENMP_HPP */

View File

@ -1,498 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
/// \file Kokkos_Pair.hpp
/// \brief Declaration and definition of Kokkos::pair.
///
/// This header file declares and defines Kokkos::pair and its related
/// nonmember functions.
#ifndef KOKKOS_PAIR_HPP
#define KOKKOS_PAIR_HPP
#include <Kokkos_Macros.hpp>
#include <utility>
namespace Kokkos {
/// \struct pair
/// \brief Replacement for std::pair that works on CUDA devices.
///
/// The instance methods of std::pair, including its constructors, are
/// not marked as <tt>__device__</tt> functions. Thus, they cannot be
/// called on a CUDA device, such as an NVIDIA GPU. This struct
/// implements the same interface as std::pair, but can be used on a
/// CUDA device as well as on the host.
template <class T1, class T2>
struct pair
{
//! The first template parameter of this class.
typedef T1 first_type;
//! The second template parameter of this class.
typedef T2 second_type;
//! The first element of the pair.
first_type first;
//! The second element of the pair.
second_type second;
/// \brief Default constructor.
///
/// This calls the default constructors of T1 and T2. It won't
/// compile if those default constructors are not defined and
/// public.
KOKKOS_FORCEINLINE_FUNCTION
pair()
: first(), second()
{}
/// \brief Constructor that takes both elements of the pair.
///
/// This calls the copy constructors of T1 and T2. It won't compile
/// if those copy constructors are not defined and public.
KOKKOS_FORCEINLINE_FUNCTION
pair(first_type const& f, second_type const& s)
: first(f), second(s)
{}
/// \brief Copy constructor.
///
/// This calls the copy constructors of T1 and T2. It won't compile
/// if those copy constructors are not defined and public.
template <class U, class V>
KOKKOS_FORCEINLINE_FUNCTION
pair( const pair<U,V> &p)
: first(p.first), second(p.second)
{}
/// \brief Assignment operator.
///
/// This calls the assignment operators of T1 and T2. It won't
/// compile if the assignment operators are not defined and public.
template <class U, class V>
KOKKOS_FORCEINLINE_FUNCTION
pair<T1, T2> & operator=(const pair<U,V> &p)
{
first = p.first;
second = p.second;
return *this;
}
// from std::pair<U,V>
template <class U, class V>
pair( const std::pair<U,V> &p)
: first(p.first), second(p.second)
{}
/// \brief Return the std::pair version of this object.
///
/// This is <i>not</i> a device function; you may not call it on a
/// CUDA device. It is meant to be called on the host, if the user
/// wants an std::pair instead of a Kokkos::pair.
///
/// \note This is not a conversion operator, since defining a
/// conversion operator made the relational operators have
/// ambiguous definitions.
std::pair<T1,T2> to_std_pair() const
{ return std::make_pair(first,second); }
};
template <class T1, class T2>
struct pair<T1&, T2&>
{
//! The first template parameter of this class.
typedef T1& first_type;
//! The second template parameter of this class.
typedef T2& second_type;
//! The first element of the pair.
first_type first;
//! The second element of the pair.
second_type second;
/// \brief Constructor that takes both elements of the pair.
///
/// This calls the copy constructors of T1 and T2. It won't compile
/// if those copy constructors are not defined and public.
KOKKOS_FORCEINLINE_FUNCTION
pair(first_type f, second_type s)
: first(f), second(s)
{}
/// \brief Copy constructor.
///
/// This calls the copy constructors of T1 and T2. It won't compile
/// if those copy constructors are not defined and public.
template <class U, class V>
KOKKOS_FORCEINLINE_FUNCTION
pair( const pair<U,V> &p)
: first(p.first), second(p.second)
{}
// from std::pair<U,V>
template <class U, class V>
pair( const std::pair<U,V> &p)
: first(p.first), second(p.second)
{}
/// \brief Assignment operator.
///
/// This calls the assignment operators of T1 and T2. It won't
/// compile if the assignment operators are not defined and public.
template <class U, class V>
KOKKOS_FORCEINLINE_FUNCTION
pair<first_type, second_type> & operator=(const pair<U,V> &p)
{
first = p.first;
second = p.second;
return *this;
}
/// \brief Return the std::pair version of this object.
///
/// This is <i>not</i> a device function; you may not call it on a
/// CUDA device. It is meant to be called on the host, if the user
/// wants an std::pair instead of a Kokkos::pair.
///
/// \note This is not a conversion operator, since defining a
/// conversion operator made the relational operators have
/// ambiguous definitions.
std::pair<T1,T2> to_std_pair() const
{ return std::make_pair(first,second); }
};
template <class T1, class T2>
struct pair<T1, T2&>
{
//! The first template parameter of this class.
typedef T1 first_type;
//! The second template parameter of this class.
typedef T2& second_type;
//! The first element of the pair.
first_type first;
//! The second element of the pair.
second_type second;
/// \brief Constructor that takes both elements of the pair.
///
/// This calls the copy constructors of T1 and T2. It won't compile
/// if those copy constructors are not defined and public.
KOKKOS_FORCEINLINE_FUNCTION
pair(first_type const& f, second_type s)
: first(f), second(s)
{}
/// \brief Copy constructor.
///
/// This calls the copy constructors of T1 and T2. It won't compile
/// if those copy constructors are not defined and public.
template <class U, class V>
KOKKOS_FORCEINLINE_FUNCTION
pair( const pair<U,V> &p)
: first(p.first), second(p.second)
{}
// from std::pair<U,V>
template <class U, class V>
pair( const std::pair<U,V> &p)
: first(p.first), second(p.second)
{}
/// \brief Assignment operator.
///
/// This calls the assignment operators of T1 and T2. It won't
/// compile if the assignment operators are not defined and public.
template <class U, class V>
KOKKOS_FORCEINLINE_FUNCTION
pair<first_type, second_type> & operator=(const pair<U,V> &p)
{
first = p.first;
second = p.second;
return *this;
}
/// \brief Return the std::pair version of this object.
///
/// This is <i>not</i> a device function; you may not call it on a
/// CUDA device. It is meant to be called on the host, if the user
/// wants an std::pair instead of a Kokkos::pair.
///
/// \note This is not a conversion operator, since defining a
/// conversion operator made the relational operators have
/// ambiguous definitions.
std::pair<T1,T2> to_std_pair() const
{ return std::make_pair(first,second); }
};
template <class T1, class T2>
struct pair<T1&, T2>
{
//! The first template parameter of this class.
typedef T1& first_type;
//! The second template parameter of this class.
typedef T2 second_type;
//! The first element of the pair.
first_type first;
//! The second element of the pair.
second_type second;
/// \brief Constructor that takes both elements of the pair.
///
/// This calls the copy constructors of T1 and T2. It won't compile
/// if those copy constructors are not defined and public.
KOKKOS_FORCEINLINE_FUNCTION
pair(first_type f, second_type const& s)
: first(f), second(s)
{}
/// \brief Copy constructor.
///
/// This calls the copy constructors of T1 and T2. It won't compile
/// if those copy constructors are not defined and public.
template <class U, class V>
KOKKOS_FORCEINLINE_FUNCTION
pair( const pair<U,V> &p)
: first(p.first), second(p.second)
{}
// from std::pair<U,V>
template <class U, class V>
pair( const std::pair<U,V> &p)
: first(p.first), second(p.second)
{}
/// \brief Assignment operator.
///
/// This calls the assignment operators of T1 and T2. It won't
/// compile if the assignment operators are not defined and public.
template <class U, class V>
KOKKOS_FORCEINLINE_FUNCTION
pair<first_type, second_type> & operator=(const pair<U,V> &p)
{
first = p.first;
second = p.second;
return *this;
}
/// \brief Return the std::pair version of this object.
///
/// This is <i>not</i> a device function; you may not call it on a
/// CUDA device. It is meant to be called on the host, if the user
/// wants an std::pair instead of a Kokkos::pair.
///
/// \note This is not a conversion operator, since defining a
/// conversion operator made the relational operators have
/// ambiguous definitions.
std::pair<T1,T2> to_std_pair() const
{ return std::make_pair(first,second); }
};
//! Equality operator for Kokkos::pair.
template <class T1, class T2>
KOKKOS_FORCEINLINE_FUNCTION
bool operator== (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs)
{ return lhs.first==rhs.first && lhs.second==rhs.second; }
//! Inequality operator for Kokkos::pair.
template <class T1, class T2>
KOKKOS_FORCEINLINE_FUNCTION
bool operator!= (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs)
{ return !(lhs==rhs); }
//! Less-than operator for Kokkos::pair.
template <class T1, class T2>
KOKKOS_FORCEINLINE_FUNCTION
bool operator< (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs)
{ return lhs.first<rhs.first || (!(rhs.first<lhs.first) && lhs.second<rhs.second); }
//! Less-than-or-equal-to operator for Kokkos::pair.
template <class T1, class T2>
KOKKOS_FORCEINLINE_FUNCTION
bool operator<= (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs)
{ return !(rhs<lhs); }
//! Greater-than operator for Kokkos::pair.
template <class T1, class T2>
KOKKOS_FORCEINLINE_FUNCTION
bool operator> (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs)
{ return rhs<lhs; }
//! Greater-than-or-equal-to operator for Kokkos::pair.
template <class T1, class T2>
KOKKOS_FORCEINLINE_FUNCTION
bool operator>= (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs)
{ return !(lhs<rhs); }
/// \brief Return a new pair.
///
/// This is a "nonmember constructor" for Kokkos::pair. It works just
/// like std::make_pair.
template <class T1,class T2>
KOKKOS_FORCEINLINE_FUNCTION
pair<T1,T2> make_pair (T1 x, T2 y)
{ return ( pair<T1,T2>(x,y) ); }
/// \brief Return a pair of references to the input arguments.
///
/// This compares to std::tie (new in C++11). You can use it to
/// assign to two variables at once, from the result of a function
/// that returns a pair. For example (<tt>__device__</tt> and
/// <tt>__host__</tt> attributes omitted for brevity):
/// \code
/// // Declaration of the function to call.
/// // First return value: operation count.
/// // Second return value: whether all operations succeeded.
/// Kokkos::pair<int, bool> someFunction ();
///
/// // Code that uses Kokkos::tie.
/// int myFunction () {
/// int count = 0;
/// bool success = false;
///
/// // This assigns to both count and success.
/// Kokkos::tie (count, success) = someFunction ();
///
/// if (! success) {
/// // ... Some operation failed;
/// // take corrective action ...
/// }
/// return count;
/// }
/// \endcode
///
/// The line that uses tie() could have been written like this:
/// \code
/// Kokkos::pair<int, bool> result = someFunction ();
/// count = result.first;
/// success = result.second;
/// \endcode
///
/// Using tie() saves two lines of code and avoids a copy of each
/// element of the pair. The latter could be significant if one or
/// both elements of the pair are more substantial objects than \c int
/// or \c bool.
template <class T1,class T2>
KOKKOS_FORCEINLINE_FUNCTION
pair<T1 &,T2 &> tie (T1 & x, T2 & y)
{ return ( pair<T1 &,T2 &>(x,y) ); }
//
// Specialization of Kokkos::pair for a \c void second argument. This
// is not actually a "pair"; it only contains one element, the first.
//
template <class T1>
struct pair<T1,void>
{
typedef T1 first_type;
typedef void second_type;
first_type first;
enum { second = 0 };
KOKKOS_FORCEINLINE_FUNCTION
pair()
: first()
{}
KOKKOS_FORCEINLINE_FUNCTION
pair(const first_type & f)
: first(f)
{}
KOKKOS_FORCEINLINE_FUNCTION
pair(const first_type & f, int)
: first(f)
{}
template <class U>
KOKKOS_FORCEINLINE_FUNCTION
pair( const pair<U,void> &p)
: first(p.first)
{}
template <class U>
KOKKOS_FORCEINLINE_FUNCTION
pair<T1, void> & operator=(const pair<U,void> &p)
{
first = p.first;
return *this;
}
};
//
// Specialization of relational operators for Kokkos::pair<T1,void>.
//
template <class T1>
KOKKOS_FORCEINLINE_FUNCTION
bool operator== (const pair<T1,void>& lhs, const pair<T1,void>& rhs)
{ return lhs.first==rhs.first; }
template <class T1>
KOKKOS_FORCEINLINE_FUNCTION
bool operator!= (const pair<T1,void>& lhs, const pair<T1,void>& rhs)
{ return !(lhs==rhs); }
template <class T1>
KOKKOS_FORCEINLINE_FUNCTION
bool operator< (const pair<T1,void>& lhs, const pair<T1,void>& rhs)
{ return lhs.first<rhs.first; }
template <class T1>
KOKKOS_FORCEINLINE_FUNCTION
bool operator<= (const pair<T1,void>& lhs, const pair<T1,void>& rhs)
{ return !(rhs<lhs); }
template <class T1>
KOKKOS_FORCEINLINE_FUNCTION
bool operator> (const pair<T1,void>& lhs, const pair<T1,void>& rhs)
{ return rhs<lhs; }
template <class T1>
KOKKOS_FORCEINLINE_FUNCTION
bool operator>= (const pair<T1,void>& lhs, const pair<T1,void>& rhs)
{ return !(lhs<rhs); }
} // namespace Kokkos
#endif //KOKKOS_PAIR_HPP

View File

@ -1,908 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_Parallel.hpp
/// \brief Declaration of parallel operators
#ifndef KOKKOS_PARALLEL_HPP
#define KOKKOS_PARALLEL_HPP
#include <cstddef>
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_View.hpp>
#include <Kokkos_ExecPolicy.hpp>
#ifdef KOKKOSP_ENABLE_PROFILING
#include <impl/Kokkos_Profiling_Interface.hpp>
#include <typeinfo>
#endif
#include <impl/Kokkos_AllocationTracker.hpp>
#include <impl/Kokkos_Tags.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#ifdef KOKKOS_HAVE_DEBUG
#include<iostream>
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
/** \brief Given a Functor and Execution Policy query an execution space.
*
* if the Policy has an execution space use that
* else if the Functor has an execution_space use that
* else if the Functor has a device_type use that for backward compatibility
* else use the default
*/
template< class Functor
, class Policy
, class EnableFunctor = void
, class EnablePolicy = void
>
struct FunctorPolicyExecutionSpace {
typedef Kokkos::DefaultExecutionSpace execution_space ;
};
template< class Functor , class Policy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::device_type >::type
, typename enable_if_type< typename Policy ::execution_space >::type
>
{
typedef typename Policy ::execution_space execution_space ;
};
template< class Functor , class Policy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::execution_space >::type
, typename enable_if_type< typename Policy ::execution_space >::type
>
{
typedef typename Policy ::execution_space execution_space ;
};
template< class Functor , class Policy , class EnableFunctor >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, EnableFunctor
, typename enable_if_type< typename Policy::execution_space >::type
>
{
typedef typename Policy ::execution_space execution_space ;
};
template< class Functor , class Policy , class EnablePolicy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::device_type >::type
, EnablePolicy
>
{
typedef typename Functor::device_type execution_space ;
};
template< class Functor , class Policy , class EnablePolicy >
struct FunctorPolicyExecutionSpace
< Functor , Policy
, typename enable_if_type< typename Functor::execution_space >::type
, EnablePolicy
>
{
typedef typename Functor::execution_space execution_space ;
};
//----------------------------------------------------------------------------
/// \class ParallelFor
/// \brief Implementation of the ParallelFor operator that has a
/// partial specialization for the device.
///
/// This is an implementation detail of parallel_for. Users should
/// skip this and go directly to the nonmember function parallel_for.
template< class FunctorType , class ExecPolicy > class ParallelFor ;
/// \class ParallelReduce
/// \brief Implementation detail of parallel_reduce.
///
/// This is an implementation detail of parallel_reduce. Users should
/// skip this and go directly to the nonmember function parallel_reduce.
template< class FunctorType , class ExecPolicy > class ParallelReduce ;
/// \class ParallelScan
/// \brief Implementation detail of parallel_scan.
///
/// This is an implementation detail of parallel_scan. Users should
/// skip this and go directly to the documentation of the nonmember
/// template function Kokkos::parallel_scan.
template< class FunctorType , class ExecPolicy > class ParallelScan ;
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/** \brief Execute \c functor in parallel according to the execution \c policy.
*
* A "functor" is a class containing the function to execute in parallel,
* data needed for that execution, and an optional \c execution_space
* typedef. Here is an example functor for parallel_for:
*
* \code
* class FunctorType {
* public:
* typedef ... execution_space ;
* void operator() ( WorkType iwork ) const ;
* };
* \endcode
*
* In the above example, \c WorkType is any integer type for which a
* valid conversion from \c size_t to \c IntType exists. Its
* <tt>operator()</tt> method defines the operation to parallelize,
* over the range of integer indices <tt>iwork=[0,work_count-1]</tt>.
* This compares to a single iteration \c iwork of a \c for loop.
* If \c execution_space is not defined DefaultExecutionSpace will be used.
*/
template< class ExecPolicy , class FunctorType >
inline
void parallel_for( const ExecPolicy & policy
, const FunctorType & functor
, const std::string& str = ""
, typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0
)
{
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
(void) Impl::ParallelFor< FunctorType , ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy );
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelFor(kpID);
}
#endif
}
template< class FunctorType >
inline
void parallel_for( const size_t work_count
, const FunctorType & functor
, const std::string& str = ""
)
{
typedef typename
Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef RangePolicy< execution_space > policy ;
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
(void) Impl::ParallelFor< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) );
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelFor(kpID);
}
#endif
}
template< class ExecPolicy , class FunctorType >
inline
void parallel_for( const std::string & str
, const ExecPolicy & policy
, const FunctorType & functor )
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
#endif
parallel_for(policy,functor,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl;
#endif
(void) str;
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/** \brief Parallel reduction
*
* Example of a parallel_reduce functor for a POD (plain old data) value type:
* \code
* class FunctorType { // For POD value type
* public:
* typedef ... execution_space ;
* typedef <podType> value_type ;
* void operator()( <intType> iwork , <podType> & update ) const ;
* void init( <podType> & update ) const ;
* void join( volatile <podType> & update ,
* volatile const <podType> & input ) const ;
*
* typedef true_type has_final ;
* void final( <podType> & update ) const ;
* };
* \endcode
*
* Example of a parallel_reduce functor for an array of POD (plain old data) values:
* \code
* class FunctorType { // For array of POD value
* public:
* typedef ... execution_space ;
* typedef <podType> value_type[] ;
* void operator()( <intType> , <podType> update[] ) const ;
* void init( <podType> update[] ) const ;
* void join( volatile <podType> update[] ,
* volatile const <podType> input[] ) const ;
*
* typedef true_type has_final ;
* void final( <podType> update[] ) const ;
* };
* \endcode
*/
template< class ExecPolicy , class FunctorType >
inline
void parallel_reduce( const ExecPolicy & policy
, const FunctorType & functor
, const std::string& str = ""
, typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0
)
{
// typedef typename
// Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space
// execution_space ;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ;
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
, typename ValueTraits::value_type
, typename ValueTraits::pointer_type
>::type value_type ;
Kokkos::View< value_type
, HostSpace
, Kokkos::MemoryUnmanaged
>
result_view ;
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
(void) Impl::ParallelReduce< FunctorType , ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy , result_view );
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelReduce(kpID);
}
#endif
}
// integral range policy
template< class FunctorType >
inline
void parallel_reduce( const size_t work_count
, const FunctorType & functor
, const std::string& str = ""
)
{
typedef typename
Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef RangePolicy< execution_space > policy ;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
, typename ValueTraits::value_type
, typename ValueTraits::pointer_type
>::type value_type ;
Kokkos::View< value_type
, HostSpace
, Kokkos::MemoryUnmanaged
>
result_view ;
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
(void) Impl::ParallelReduce< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) , result_view );
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelReduce(kpID);
}
#endif
}
// general policy and view ouput
template< class ExecPolicy , class FunctorType , class ViewType >
inline
void parallel_reduce( const ExecPolicy & policy
, const FunctorType & functor
, const ViewType & result_view
, const std::string& str = ""
, typename Impl::enable_if<
( Impl::is_view<ViewType>::value && ! Impl::is_integral< ExecPolicy >::value
#ifdef KOKKOS_HAVE_CUDA
&& ! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value
#endif
)>::type * = 0 )
{
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
(void) Impl::ParallelReduce< FunctorType, ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy , Impl::CopyWithoutTracking::apply(result_view) );
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelReduce(kpID);
}
#endif
}
// general policy and pod or array of pod output
template< class ExecPolicy , class FunctorType >
void parallel_reduce( const ExecPolicy & policy
, const FunctorType & functor
#ifdef KOKKOS_HAVE_CUDA
, typename Impl::enable_if<
( ! Impl::is_integral< ExecPolicy >::value &&
! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value )
, typename Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag >::reference_type>::type result_ref
, const std::string& str = ""
, typename Impl::enable_if<! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value >::type* = 0
)
#else
, typename Impl::enable_if<
( ! Impl::is_integral< ExecPolicy >::value)
, typename Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag >::reference_type
>::type result_ref
, const std::string& str = ""
)
#endif
{
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueOps< FunctorType , typename ExecPolicy::work_tag > ValueOps ;
// Wrap the result output request in a view to inform the implementation
// of the type and memory space.
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
, typename ValueTraits::value_type
, typename ValueTraits::pointer_type
>::type value_type ;
Kokkos::View< value_type
, HostSpace
, Kokkos::MemoryUnmanaged
>
result_view( ValueOps::pointer( result_ref )
, ValueTraits::value_count( functor )
);
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
(void) Impl::ParallelReduce< FunctorType, ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy , Impl::CopyWithoutTracking::apply(result_view) );
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelReduce(kpID);
}
#endif
}
// integral range policy and view ouput
template< class FunctorType , class ViewType >
inline
void parallel_reduce( const size_t work_count
, const FunctorType & functor
, const ViewType & result_view
, const std::string& str = ""
, typename Impl::enable_if<( Impl::is_view<ViewType>::value
#ifdef KOKKOS_HAVE_CUDA
&& ! Impl::is_same<
typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space,
Kokkos::Cuda>::value
#endif
)>::type * = 0 )
{
typedef typename
Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef RangePolicy< execution_space > ExecPolicy ;
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
(void) Impl::ParallelReduce< FunctorType, ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , ExecPolicy(0,work_count) , Impl::CopyWithoutTracking::apply(result_view) );
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelReduce(kpID);
}
#endif
}
// integral range policy and pod or array of pod output
template< class FunctorType >
inline
void parallel_reduce( const size_t work_count
, const FunctorType & functor
, typename Kokkos::Impl::FunctorValueTraits<
typename Impl::if_c<Impl::is_execution_policy<FunctorType>::value ||
Impl::is_integral<FunctorType>::value,
void,FunctorType>::type
, void >::reference_type result
, const std::string& str = ""
, typename Impl::enable_if< true
#ifdef KOKKOS_HAVE_CUDA
&& ! Impl::is_same<
typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space,
Kokkos::Cuda>::value
#endif
>::type * = 0 )
{
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
typedef Kokkos::Impl::FunctorValueOps< FunctorType , void > ValueOps ;
typedef typename
Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef Kokkos::RangePolicy< execution_space > policy ;
// Wrap the result output request in a view to inform the implementation
// of the type and memory space.
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
, typename ValueTraits::value_type
, typename ValueTraits::pointer_type
>::type value_type ;
Kokkos::View< value_type
, HostSpace
, Kokkos::MemoryUnmanaged
>
result_view( ValueOps::pointer( result )
, ValueTraits::value_count( functor )
);
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
(void) Impl::ParallelReduce< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) , Impl::CopyWithoutTracking::apply(result_view) );
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelReduce(kpID);
}
#endif
}
template< class ExecPolicy , class FunctorType , class ResultType >
inline
void parallel_reduce( const std::string & str
, const ExecPolicy & policy
, const FunctorType & functor
, ResultType * result)
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
#endif
parallel_reduce(policy,functor,result,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
#endif
(void) str;
}
template< class ExecPolicy , class FunctorType , class ResultType >
inline
void parallel_reduce( const std::string & str
, const ExecPolicy & policy
, const FunctorType & functor
, ResultType & result)
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
#endif
parallel_reduce(policy,functor,result,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
#endif
(void) str;
}
template< class ExecPolicy , class FunctorType >
inline
void parallel_reduce( const std::string & str
, const ExecPolicy & policy
, const FunctorType & functor)
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
#endif
parallel_reduce(policy,functor,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
#endif
(void) str;
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
/// \fn parallel_scan
/// \tparam ExecutionPolicy The execution policy type.
/// \tparam FunctorType The scan functor type.
///
/// \param policy [in] The execution policy.
/// \param functor [in] The scan functor.
///
/// This function implements a parallel scan pattern. The scan can
/// be either inclusive or exclusive, depending on how you implement
/// the scan functor.
///
/// A scan functor looks almost exactly like a reduce functor, except
/// that its operator() takes a third \c bool argument, \c final_pass,
/// which indicates whether this is the last pass of the scan
/// operation. We will show below how to use the \c final_pass
/// argument to control whether the scan is inclusive or exclusive.
///
/// Here is the minimum required interface of a scan functor for a POD
/// (plain old data) value type \c PodType. That is, the result is a
/// View of zero or more PodType. It is also possible for the result
/// to be an array of (same-sized) arrays of PodType, but we do not
/// show the required interface for that here.
/// \code
/// template< class ExecPolicy , class FunctorType >
/// class ScanFunctor {
/// public:
/// // The Kokkos device type
/// typedef ... execution_space;
/// // Type of an entry of the array containing the result;
/// // also the type of each of the entries combined using
/// // operator() or join().
/// typedef PodType value_type;
///
/// void operator () (const ExecPolicy::member_type & i, value_type& update, const bool final_pass) const;
/// void init (value_type& update) const;
/// void join (volatile value_type& update, volatile const value_type& input) const
/// };
/// \endcode
///
/// Here is an example of a functor which computes an inclusive plus-scan
/// of an array of \c int, in place. If given an array [1, 2, 3, 4], this
/// scan will overwrite that array with [1, 3, 6, 10].
///
/// \code
/// template<class SpaceType>
/// class InclScanFunctor {
/// public:
/// typedef SpaceType execution_space;
/// typedef int value_type;
/// typedef typename SpaceType::size_type size_type;
///
/// InclScanFunctor( Kokkos::View<value_type*, execution_space> x
/// , Kokkos::View<value_type*, execution_space> y ) : m_x(x), m_y(y) {}
///
/// void operator () (const size_type i, value_type& update, const bool final_pass) const {
/// update += m_x(i);
/// if (final_pass) {
/// m_y(i) = update;
/// }
/// }
/// void init (value_type& update) const {
/// update = 0;
/// }
/// void join (volatile value_type& update, volatile const value_type& input) const {
/// update += input;
/// }
///
/// private:
/// Kokkos::View<value_type*, execution_space> m_x;
/// Kokkos::View<value_type*, execution_space> m_y;
/// };
/// \endcode
///
/// Here is an example of a functor which computes an <i>exclusive</i>
/// scan of an array of \c int, in place. In operator(), note both
/// that the final_pass test and the update have switched places, and
/// the use of a temporary. If given an array [1, 2, 3, 4], this scan
/// will overwrite that array with [0, 1, 3, 6].
///
/// \code
/// template<class SpaceType>
/// class ExclScanFunctor {
/// public:
/// typedef SpaceType execution_space;
/// typedef int value_type;
/// typedef typename SpaceType::size_type size_type;
///
/// ExclScanFunctor (Kokkos::View<value_type*, execution_space> x) : x_ (x) {}
///
/// void operator () (const size_type i, value_type& update, const bool final_pass) const {
/// const value_type x_i = x_(i);
/// if (final_pass) {
/// x_(i) = update;
/// }
/// update += x_i;
/// }
/// void init (value_type& update) const {
/// update = 0;
/// }
/// void join (volatile value_type& update, volatile const value_type& input) const {
/// update += input;
/// }
///
/// private:
/// Kokkos::View<value_type*, execution_space> x_;
/// };
/// \endcode
///
/// Here is an example of a functor which builds on the above
/// exclusive scan example, to compute an offsets array from a
/// population count array, in place. We assume that the pop count
/// array has an extra entry at the end to store the final count. If
/// given an array [1, 2, 3, 4, 0], this scan will overwrite that
/// array with [0, 1, 3, 6, 10].
///
/// \code
/// template<class SpaceType>
/// class OffsetScanFunctor {
/// public:
/// typedef SpaceType execution_space;
/// typedef int value_type;
/// typedef typename SpaceType::size_type size_type;
///
/// // lastIndex_ is the last valid index (zero-based) of x.
/// // If x has length zero, then lastIndex_ won't be used anyway.
/// OffsetScanFunctor( Kokkos::View<value_type*, execution_space> x
/// , Kokkos::View<value_type*, execution_space> y )
/// : m_x(x), m_y(y), last_index_ (x.dimension_0 () == 0 ? 0 : x.dimension_0 () - 1)
/// {}
///
/// void operator () (const size_type i, int& update, const bool final_pass) const {
/// if (final_pass) {
/// m_y(i) = update;
/// }
/// update += m_x(i);
/// // The last entry of m_y gets the final sum.
/// if (final_pass && i == last_index_) {
/// m_y(i+1) = update;
/// }
/// }
/// void init (value_type& update) const {
/// update = 0;
/// }
/// void join (volatile value_type& update, volatile const value_type& input) const {
/// update += input;
/// }
///
/// private:
/// Kokkos::View<value_type*, execution_space> m_x;
/// Kokkos::View<value_type*, execution_space> m_y;
/// const size_type last_index_;
/// };
/// \endcode
///
template< class ExecutionPolicy , class FunctorType >
inline
void parallel_scan( const ExecutionPolicy & policy
, const FunctorType & functor
, const std::string& str = ""
, typename Impl::enable_if< ! Impl::is_integral< ExecutionPolicy >::value >::type * = 0
)
{
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
Impl::ParallelScan< FunctorType , ExecutionPolicy > scan( Impl::CopyWithoutTracking::apply(functor) , policy );
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelScan(kpID);
}
#endif
}
template< class FunctorType >
inline
void parallel_scan( const size_t work_count
, const FunctorType & functor
, const std::string& str = "" )
{
typedef typename
Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
execution_space ;
typedef Kokkos::RangePolicy< execution_space > policy ;
#ifdef KOKKOSP_ENABLE_PROFILING
uint64_t kpID = 0;
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
}
#endif
(void) Impl::ParallelScan< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) );
#ifdef KOKKOSP_ENABLE_PROFILING
if(Kokkos::Experimental::profileLibraryLoaded()) {
Kokkos::Experimental::endParallelScan(kpID);
}
#endif
}
template< class ExecutionPolicy , class FunctorType >
inline
void parallel_scan( const std::string& str
, const ExecutionPolicy & policy
, const FunctorType & functor)
{
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
#endif
parallel_scan(policy,functor,str);
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
Kokkos::fence();
std::cout << "KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
#endif
(void) str;
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Enable = void >
struct FunctorTeamShmemSize
{
static inline size_t value( const FunctorType & , int ) { return 0 ; }
};
template< class FunctorType >
struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::team_shmem_size ) >::type >
{
static inline size_t value( const FunctorType & f , int team_size ) { return f.team_shmem_size( team_size ) ; }
};
template< class FunctorType >
struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::shmem_size ) >::type >
{
static inline size_t value( const FunctorType & f , int team_size ) { return f.shmem_size( team_size ) ; }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* KOKKOS_PARALLEL_HPP */

View File

@ -1,165 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_QTHREAD_HPP
#define KOKKOS_QTHREAD_HPP
#include <cstddef>
#include <iosfwd>
#include <Kokkos_Core.hpp>
#include <Kokkos_Layout.hpp>
#include <Kokkos_MemoryTraits.hpp>
#include <Kokkos_HostSpace.hpp>
#include <Kokkos_ExecPolicy.hpp>
#include <impl/Kokkos_Tags.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
class QthreadExec ;
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/** \brief Execution space supported by Qthread */
class Qthread {
public:
//! \name Type declarations that all Kokkos devices must provide.
//@{
//! Tag this class as an execution space
typedef Qthread execution_space ;
typedef Kokkos::HostSpace memory_space ;
//! This execution space preferred device_type
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef Kokkos::LayoutRight array_layout ;
typedef memory_space::size_type size_type ;
typedef ScratchMemorySpace< Qthread > scratch_memory_space ;
//@}
/*------------------------------------------------------------------------*/
/** \brief Initialization will construct one or more instances */
static Qthread & instance( int = 0 );
/** \brief Set the execution space to a "sleep" state.
*
* This function sets the "sleep" state in which it is not ready for work.
* This may consume less resources than in an "ready" state,
* but it may also take time to transition to the "ready" state.
*
* \return True if enters or is in the "sleep" state.
* False if functions are currently executing.
*/
bool sleep();
/** \brief Wake from the sleep state.
*
* \return True if enters or is in the "ready" state.
* False if functions are currently executing.
*/
static bool wake();
/** \brief Wait until all dispatched functions to complete.
*
* The parallel_for or parallel_reduce dispatch of a functor may
* return asynchronously, before the functor completes. This
* method does not return until all dispatched functors on this
* device have completed.
*/
static void fence();
/*------------------------------------------------------------------------*/
static void initialize( int thread_count );
static void finalize();
/** \brief Print configuration information to the given output stream. */
static void print_configuration( std::ostream & , const bool detail = false );
int shepherd_size() const ;
int shepherd_worker_size() const ;
};
/*--------------------------------------------------------------------------*/
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
template<>
struct VerifyExecutionCanAccessMemorySpace
< Kokkos::Qthread::memory_space
, Kokkos::Qthread::scratch_memory_space
>
{
enum { value = true };
inline static void verify( void ) { }
inline static void verify( const void * ) { }
};
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
#include <Kokkos_Parallel.hpp>
#include <Qthread/Kokkos_QthreadExec.hpp>
#include <Qthread/Kokkos_Qthread_Parallel.hpp>
#endif /* #define KOKKOS_QTHREAD_HPP */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

View File

@ -1,125 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SCRATCHSPACE_HPP
#define KOKKOS_SCRATCHSPACE_HPP
#include <stdio.h>
#include <Kokkos_Core_fwd.hpp>
#include <impl/Kokkos_Tags.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/** \brief Scratch memory space associated with an execution space.
*
*/
template< class ExecSpace >
class ScratchMemorySpace {
public:
// Alignment of memory chunks returned by 'get'
// must be a power of two
enum { ALIGN = 8 };
private:
mutable char * m_iter ;
char * m_end ;
ScratchMemorySpace();
ScratchMemorySpace & operator = ( const ScratchMemorySpace & );
enum { MASK = ALIGN - 1 }; // Alignment used by View::shmem_size
public:
//! Tag this class as a memory space
typedef ScratchMemorySpace memory_space ;
typedef ExecSpace execution_space ;
//! This execution space preferred device_type
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef typename ExecSpace::array_layout array_layout ;
typedef typename ExecSpace::size_type size_type ;
template< typename IntType >
KOKKOS_INLINE_FUNCTION static
IntType align( const IntType & size )
{ return ( size + MASK ) & ~MASK ; }
template< typename IntType >
KOKKOS_INLINE_FUNCTION
void* get_shmem (const IntType& size) const {
void* tmp = m_iter ;
if (m_end < (m_iter += align (size))) {
m_iter -= align (size); // put it back like it was
#ifdef KOKKOS_HAVE_DEBUG
// mfh 23 Jun 2015: printf call consumes 25 registers
// in a CUDA build, so only print in debug mode. The
// function still returns NULL if not enough memory.
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
long(m_end-m_iter));
#endif // KOKKOS_HAVE_DEBUG
tmp = 0;
}
return tmp;
}
template< typename IntType >
KOKKOS_INLINE_FUNCTION
ScratchMemorySpace( void * ptr , const IntType & size )
: m_iter( (char *) ptr )
, m_end( m_iter + size )
{}
};
} // namespace Kokkos
#endif /* #ifndef KOKKOS_SCRATCHSPACE_HPP */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

View File

@ -1,892 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_Serial.hpp
/// \brief Declaration and definition of Kokkos::Serial device.
#ifndef KOKKOS_SERIAL_HPP
#define KOKKOS_SERIAL_HPP
#include <cstddef>
#include <iosfwd>
#include <Kokkos_Parallel.hpp>
#include <Kokkos_Layout.hpp>
#include <Kokkos_HostSpace.hpp>
#include <Kokkos_ScratchSpace.hpp>
#include <Kokkos_MemoryTraits.hpp>
#include <impl/Kokkos_Tags.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
namespace Kokkos {
/// \class Serial
/// \brief Kokkos device for non-parallel execution
///
/// A "device" represents a parallel execution model. It tells Kokkos
/// how to parallelize the execution of kernels in a parallel_for or
/// parallel_reduce. For example, the Threads device uses Pthreads or
/// C++11 threads on a CPU, the OpenMP device uses the OpenMP language
/// extensions, and the Cuda device uses NVIDIA's CUDA programming
/// model. The Serial device executes "parallel" kernels
/// sequentially. This is useful if you really do not want to use
/// threads, or if you want to explore different combinations of MPI
/// and shared-memory parallel programming models.
class Serial {
public:
//! \name Type declarations that all Kokkos devices must provide.
//@{
//! Tag this class as an execution space:
typedef Serial execution_space ;
//! The size_type typedef best suited for this device.
typedef HostSpace::size_type size_type ;
//! This device's preferred memory space.
typedef HostSpace memory_space ;
//! This execution space preferred device_type
typedef Kokkos::Device<execution_space,memory_space> device_type;
//! This device's preferred array layout.
typedef LayoutRight array_layout ;
/// \brief Scratch memory space
typedef ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ;
//@}
/// \brief True if and only if this method is being called in a
/// thread-parallel function.
///
/// For the Serial device, this method <i>always</i> returns false,
/// because parallel_for or parallel_reduce with the Serial device
/// always execute sequentially.
inline static int in_parallel() { return false ; }
/** \brief Set the device in a "sleep" state.
*
* This function sets the device in a "sleep" state in which it is
* not ready for work. This may consume less resources than if the
* device were in an "awake" state, but it may also take time to
* bring the device from a sleep state to be ready for work.
*
* \return True if the device is in the "sleep" state, else false if
* the device is actively working and could not enter the "sleep"
* state.
*/
static bool sleep();
/// \brief Wake the device from the 'sleep' state so it is ready for work.
///
/// \return True if the device is in the "ready" state, else "false"
/// if the device is actively working (which also means that it's
/// awake).
static bool wake();
/// \brief Wait until all dispatched functors complete.
///
/// The parallel_for or parallel_reduce dispatch of a functor may
/// return asynchronously, before the functor completes. This
/// method does not return until all dispatched functors on this
/// device have completed.
static void fence() {}
static void initialize( unsigned threads_count = 1 ,
unsigned use_numa_count = 0 ,
unsigned use_cores_per_numa = 0 ,
bool allow_asynchronous_threadpool = false) {
(void) threads_count;
(void) use_numa_count;
(void) use_cores_per_numa;
(void) allow_asynchronous_threadpool;
// Init the array of locks used for arbitrarily sized atomics
Impl::init_lock_array_host_space();
}
static int is_initialized() { return 1 ; }
//! Free any resources being consumed by the device.
static void finalize() {}
//! Print configuration information to the given output stream.
static void print_configuration( std::ostream & , const bool detail = false ) {}
//--------------------------------------------------------------------------
inline static int thread_pool_size( int = 0 ) { return 1 ; }
KOKKOS_INLINE_FUNCTION static int thread_pool_rank() { return 0 ; }
//--------------------------------------------------------------------------
KOKKOS_INLINE_FUNCTION static unsigned hardware_thread_id() { return thread_pool_rank(); }
inline static unsigned max_hardware_threads() { return thread_pool_size(0); }
//--------------------------------------------------------------------------
static void * scratch_memory_resize( unsigned reduce_size , unsigned shared_size );
//--------------------------------------------------------------------------
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
template<>
struct VerifyExecutionCanAccessMemorySpace
< Kokkos::Serial::memory_space
, Kokkos::Serial::scratch_memory_space
>
{
enum { value = true };
inline static void verify( void ) { }
inline static void verify( const void * ) { }
};
namespace SerialImpl {
struct Sentinel {
void * m_scratch ;
unsigned m_reduce_end ;
unsigned m_shared_end ;
Sentinel();
~Sentinel();
static Sentinel & singleton();
};
inline
unsigned align( unsigned n );
}
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
class SerialTeamMember {
private:
typedef Kokkos::ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ;
const scratch_memory_space m_space ;
const int m_league_rank ;
const int m_league_size ;
SerialTeamMember & operator = ( const SerialTeamMember & );
public:
KOKKOS_INLINE_FUNCTION
const scratch_memory_space & team_shmem() const { return m_space ; }
KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; }
KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; }
KOKKOS_INLINE_FUNCTION int team_size() const { return 1 ; }
KOKKOS_INLINE_FUNCTION void team_barrier() const {}
template<class ValueType>
KOKKOS_INLINE_FUNCTION
void team_broadcast(const ValueType& , const int& ) const {}
template< class ValueType, class JoinOp >
KOKKOS_INLINE_FUNCTION
ValueType team_reduce( const ValueType & value , const JoinOp & ) const
{
return value ;
}
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
* with intra-team non-deterministic ordering accumulation.
*
* The global inter-team accumulation value will, at the end of the
* league's parallel execution, be the scan's total.
* Parallel execution ordering of the league's teams is non-deterministic.
* As such the base value for each team's scan operation is similarly
* non-deterministic.
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const
{
const Type tmp = global_accum ? *global_accum : Type(0) ;
if ( global_accum ) { *global_accum += value ; }
return tmp ;
}
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
*
* The highest rank thread can compute the reduction total as
* reduction_total = dev.team_scan( value ) + value ;
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & ) const
{ return Type(0); }
//----------------------------------------
// Execution space specific:
SerialTeamMember( int arg_league_rank
, int arg_league_size
, int arg_shared_size
);
};
} // namespace Impl
/*
* < Kokkos::Serial , WorkArgTag >
* < WorkArgTag , Impl::enable_if< Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value >::type >
*
*/
template< class Arg0 , class Arg1 >
class TeamPolicy< Arg0 , Arg1 , Kokkos::Serial >
{
private:
const int m_league_size ;
public:
//! Tag this class as a kokkos execution policy
typedef TeamPolicy execution_policy ;
//! Execution space of this execution policy:
typedef Kokkos::Serial execution_space ;
typedef typename
Impl::if_c< ! Impl::is_same< Kokkos::Serial , Arg0 >::value , Arg0 , Arg1 >::type
work_tag ;
//----------------------------------------
template< class FunctorType >
static
int team_size_max( const FunctorType & ) { return 1 ; }
template< class FunctorType >
static
int team_size_recommended( const FunctorType & ) { return 1 ; }
template< class FunctorType >
static
int team_size_recommended( const FunctorType & , const int& ) { return 1 ; }
//----------------------------------------
inline int team_size() const { return 1 ; }
inline int league_size() const { return m_league_size ; }
/** \brief Specify league size, request team size */
TeamPolicy( execution_space & , int league_size_request , int /* team_size_request */ , int vector_length_request = 1 )
: m_league_size( league_size_request )
{ (void) vector_length_request; }
TeamPolicy( int league_size_request , int /* team_size_request */ , int vector_length_request = 1 )
: m_league_size( league_size_request )
{ (void) vector_length_request; }
typedef Impl::SerialTeamMember member_type ;
};
} /* namespace Kokkos */
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
class ParallelFor< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > Policy ;
public:
// work tag is void
template< class PType >
inline
ParallelFor( typename Impl::enable_if<
( Impl::is_same< PType , Policy >::value &&
Impl::is_same< typename PType::work_tag , void >::value
), const FunctorType & >::type functor
, const PType & policy )
{
const typename PType::member_type e = policy.end();
for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
functor( i );
}
}
// work tag is non-void
template< class PType >
inline
ParallelFor( typename Impl::enable_if<
( Impl::is_same< PType , Policy >::value &&
! Impl::is_same< typename PType::work_tag , void >::value
), const FunctorType & >::type functor
, const PType & policy )
{
const typename PType::member_type e = policy.end();
for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
functor( typename PType::work_tag() , i );
}
}
};
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
class ParallelReduce< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > >
{
public:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > Policy ;
typedef typename Policy::work_tag WorkTag ;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
// Work tag is void
template< class ViewType , class PType >
ParallelReduce( typename Impl::enable_if<
( Impl::is_view< ViewType >::value &&
Impl::is_same< typename ViewType::memory_space , HostSpace >::value &&
Impl::is_same< PType , Policy >::value &&
Impl::is_same< typename PType::work_tag , void >::value
), const FunctorType & >::type functor
, const PType & policy
, const ViewType & result
)
{
pointer_type result_ptr = result.ptr_on_device();
if ( ! result_ptr ) {
result_ptr = (pointer_type)
Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
}
reference_type update = ValueInit::init( functor , result_ptr );
const typename PType::member_type e = policy.end();
for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
functor( i , update );
}
Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , result_ptr );
}
// Work tag is non-void
template< class ViewType , class PType >
ParallelReduce( typename Impl::enable_if<
( Impl::is_view< ViewType >::value &&
Impl::is_same< typename ViewType::memory_space , HostSpace >::value &&
Impl::is_same< PType , Policy >::value &&
! Impl::is_same< typename PType::work_tag , void >::value
), const FunctorType & >::type functor
, const PType & policy
, const ViewType & result
)
{
pointer_type result_ptr = result.ptr_on_device();
if ( ! result_ptr ) {
result_ptr = (pointer_type)
Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
}
typename ValueTraits::reference_type update = ValueInit::init( functor , result_ptr );
const typename PType::member_type e = policy.end();
for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
functor( typename PType::work_tag() , i , update );
}
Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , result_ptr );
}
};
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
class ParallelScan< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > Policy ;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
public:
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
// work tag is void
template< class PType >
inline
ParallelScan( typename Impl::enable_if<
( Impl::is_same< PType , Policy >::value &&
Impl::is_same< typename PType::work_tag , void >::value
), const FunctorType & >::type functor
, const PType & policy )
{
pointer_type result_ptr = (pointer_type)
Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
reference_type update = ValueInit::init( functor , result_ptr );
const typename PType::member_type e = policy.end();
for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
functor( i , update , true );
}
Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( functor , result_ptr );
}
// work tag is non-void
template< class PType >
inline
ParallelScan( typename Impl::enable_if<
( Impl::is_same< PType , Policy >::value &&
! Impl::is_same< typename PType::work_tag , void >::value
), const FunctorType & >::type functor
, const PType & policy )
{
pointer_type result_ptr = (pointer_type)
Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
reference_type update = ValueInit::init( functor , result_ptr );
const typename PType::member_type e = policy.end();
for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
functor( typename PType::work_tag() , i , update , true );
}
Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( functor , result_ptr );
}
};
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 >
class ParallelFor< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > >
{
private:
typedef Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > Policy ;
template< class TagType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
const FunctorType & >::type functor
, const typename Policy::member_type & member )
{ functor( member ); }
template< class TagType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
const FunctorType & >::type functor
, const typename Policy::member_type & member )
{ functor( TagType() , member ); }
public:
ParallelFor( const FunctorType & functor
, const Policy & policy )
{
const int shared_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
Kokkos::Serial::scratch_memory_resize( 0 , shared_size );
for ( int ileague = 0 ; ileague < policy.league_size() ; ++ileague ) {
ParallelFor::template driver< typename Policy::work_tag >
( functor , typename Policy::member_type(ileague,policy.league_size(),shared_size) );
// functor( typename Policy::member_type(ileague,policy.league_size(),shared_size) );
}
}
};
template< class FunctorType , class Arg0 , class Arg1 >
class ParallelReduce< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > >
{
private:
typedef Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > Policy ;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
public:
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
private:
template< class TagType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
const FunctorType & >::type functor
, const typename Policy::member_type & member
, reference_type update )
{ functor( member , update ); }
template< class TagType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
const FunctorType & >::type functor
, const typename Policy::member_type & member
, reference_type update )
{ functor( TagType() , member , update ); }
public:
template< class ViewType >
ParallelReduce( const FunctorType & functor
, const Policy & policy
, const ViewType & result
)
{
const int reduce_size = ValueTraits::value_size( functor );
const int shared_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
void * const scratch_reduce = Kokkos::Serial::scratch_memory_resize( reduce_size , shared_size );
const pointer_type result_ptr =
result.ptr_on_device() ? result.ptr_on_device()
: (pointer_type) scratch_reduce ;
reference_type update = ValueInit::init( functor , result_ptr );
for ( int ileague = 0 ; ileague < policy.league_size() ; ++ileague ) {
ParallelReduce::template driver< typename Policy::work_tag >
( functor , typename Policy::member_type(ileague,policy.league_size(),shared_size) , update );
}
Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( functor , result_ptr );
}
};
} // namespace Impl
} // namespace Kokkos
namespace Kokkos {
namespace Impl {
template<typename iType>
struct TeamThreadRangeBoundariesStruct<iType,SerialTeamMember> {
typedef iType index_type;
const iType begin ;
const iType end ;
enum {increment = 1};
const SerialTeamMember& thread;
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_count)
: begin(0)
, end(arg_count)
, thread(arg_thread)
{}
KOKKOS_INLINE_FUNCTION
TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_begin, const iType & arg_end )
: begin( arg_begin )
, end( arg_end)
, thread( arg_thread )
{}
};
template<typename iType>
struct ThreadVectorRangeBoundariesStruct<iType,SerialTeamMember> {
typedef iType index_type;
enum {start = 0};
const iType end;
enum {increment = 1};
KOKKOS_INLINE_FUNCTION
ThreadVectorRangeBoundariesStruct (const SerialTeamMember& thread, const iType& count):
end( count )
{}
};
} // namespace Impl
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>
TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & count )
{
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,count);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>
TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & begin , const iType & end )
{
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,begin,end);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >
ThreadVectorRange(const Impl::SerialTeamMember& thread, const iType& count) {
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >(thread,count);
}
KOKKOS_INLINE_FUNCTION
Impl::ThreadSingleStruct<Impl::SerialTeamMember> PerTeam(const Impl::SerialTeamMember& thread) {
return Impl::ThreadSingleStruct<Impl::SerialTeamMember>(thread);
}
KOKKOS_INLINE_FUNCTION
Impl::VectorSingleStruct<Impl::SerialTeamMember> PerThread(const Impl::SerialTeamMember& thread) {
return Impl::VectorSingleStruct<Impl::SerialTeamMember>(thread);
}
} // namespace Kokkos
namespace Kokkos {
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries, const Lambda& lambda) {
for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
}
/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
const Lambda & lambda, ValueType& result) {
result = ValueType();
for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
result+=tmp;
}
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
}
#ifdef KOKKOS_HAVE_CXX11
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
join(result,tmp);
}
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
}
#endif // KOKKOS_HAVE_CXX11
} //namespace Kokkos
namespace Kokkos {
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
result+=tmp;
}
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
join(result,tmp);
}
init_result = result;
}
/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final)
* for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed.
* Depending on the target execution space the operator might be called twice: once with final=false
* and once with final=true. When final==true val contains the prefix sum value. The contribution of this
* "i" needs to be added to val no matter whether final==true or not. In a serial execution
* (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set
* to the final sum value over all vector lanes.
* This functionality requires C++11 support.*/
template< typename iType, class FunctorType >
KOKKOS_INLINE_FUNCTION
void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const FunctorType & lambda) {
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
typedef typename ValueTraits::value_type value_type ;
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
lambda(i,scan_val,true);
}
}
} // namespace Kokkos
namespace Kokkos {
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda) {
lambda();
}
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda) {
lambda();
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda, ValueType& val) {
lambda(val);
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda, ValueType& val) {
lambda(val);
}
}
#endif // defined( KOKKOS_HAVE_SERIAL )
#endif /* #define KOKKOS_SERIAL_HPP */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

View File

@ -1,376 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// Experimental unified task-data parallel manycore LDRD
#ifndef KOKKOS_TASKPOLICY_HPP
#define KOKKOS_TASKPOLICY_HPP
#include <Kokkos_Core_fwd.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_Tags.hpp>
#include <impl/Kokkos_StaticAssert.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
struct FutureValueTypeIsVoidError {};
template < class ExecSpace , class ResultType , class FunctorType >
class TaskMember ;
template< class ExecPolicy , class ResultType , class FunctorType >
class TaskForEach ;
template< class ExecPolicy , class ResultType , class FunctorType >
class TaskReduce ;
template< class ExecPolicy , class ResultType , class FunctorType >
struct TaskScan ;
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
/**\brief States of a task */
enum TaskState
{ TASK_STATE_NULL = 0 ///< Does not exist
, TASK_STATE_CONSTRUCTING = 1 ///< Is under construction
, TASK_STATE_WAITING = 2 ///< Is waiting for execution
, TASK_STATE_EXECUTING = 4 ///< Is executing
, TASK_STATE_COMPLETE = 8 ///< Execution is complete
};
/**
*
* Future< space > // value_type == void
* Future< value > // space == Default
* Future< value , space >
*
*/
template< class Arg1 = void , class Arg2 = void >
class Future {
private:
template< class , class , class > friend class Impl::TaskMember ;
template< class > friend class TaskPolicy ;
template< class , class > friend class Future ;
// Argument #2, if not void, must be the space.
enum { Arg1_is_space = Kokkos::Impl::is_execution_space< Arg1 >::value };
enum { Arg2_is_space = Kokkos::Impl::is_execution_space< Arg2 >::value };
enum { Arg2_is_void = Kokkos::Impl::is_same< Arg2 , void >::value };
struct ErrorNoExecutionSpace {};
enum { Opt1 = Arg1_is_space && Arg2_is_void
, Opt2 = ! Arg1_is_space && Arg2_is_void
, Opt3 = ! Arg1_is_space && Arg2_is_space
, OptOK = Kokkos::Impl::StaticAssert< Opt1 || Opt2 || Opt3 , ErrorNoExecutionSpace >::value
};
typedef typename
Kokkos::Impl::if_c< Opt2 || Opt3 , Arg1 , void >::type
ValueType ;
typedef typename
Kokkos::Impl::if_c< Opt1 , Arg1 , typename
Kokkos::Impl::if_c< Opt2 , Kokkos::DefaultExecutionSpace , typename
Kokkos::Impl::if_c< Opt3 , Arg2 , void
>::type >::type >::type
ExecutionSpace ;
typedef Impl::TaskMember< ExecutionSpace , void , void > TaskRoot ;
typedef Impl::TaskMember< ExecutionSpace , ValueType , void > TaskValue ;
TaskRoot * m_task ;
public:
typedef ValueType value_type;
typedef ExecutionSpace execution_space ;
//----------------------------------------
KOKKOS_INLINE_FUNCTION
TaskState get_task_state() const
{ return 0 != m_task ? m_task->get_state() : TASK_STATE_NULL ; }
//----------------------------------------
explicit
Future( TaskRoot * task )
: m_task(0)
{ TaskRoot::assign( & m_task , TaskRoot::template verify_type< value_type >( task ) ); }
//----------------------------------------
KOKKOS_INLINE_FUNCTION
~Future() { TaskRoot::assign( & m_task , 0 ); }
//----------------------------------------
KOKKOS_INLINE_FUNCTION
Future() : m_task(0) {}
KOKKOS_INLINE_FUNCTION
Future( const Future & rhs )
: m_task(0)
{ TaskRoot::assign( & m_task , rhs.m_task ); }
KOKKOS_INLINE_FUNCTION
Future & operator = ( const Future & rhs )
{ TaskRoot::assign( & m_task , rhs.m_task ); return *this ; }
//----------------------------------------
template< class A1 , class A2 >
KOKKOS_INLINE_FUNCTION
Future( const Future<A1,A2> & rhs )
: m_task(0)
{ TaskRoot::assign( & m_task , TaskRoot::template verify_type< value_type >( rhs.m_task ) ); }
template< class A1 , class A2 >
KOKKOS_INLINE_FUNCTION
Future & operator = ( const Future<A1,A2> & rhs )
{ TaskRoot::assign( & m_task , TaskRoot::template verify_type< value_type >( rhs.m_task ) ); return *this ; }
//----------------------------------------
typedef typename TaskValue::get_result_type get_result_type ;
KOKKOS_INLINE_FUNCTION
get_result_type get() const
{ return static_cast<TaskValue*>( m_task )->get(); }
};
namespace Impl {
template< class T >
struct is_future : public Kokkos::Impl::bool_< false > {};
template< class Arg0 , class Arg1 >
struct is_future< Kokkos::Experimental::Future<Arg0,Arg1> > : public Kokkos::Impl::bool_< true > {};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
/** \brief If the argument is an execution space then a serial task in that space */
template< class Arg0 = Kokkos::DefaultExecutionSpace >
class TaskPolicy {
public:
typedef typename Arg0::execution_space execution_space ;
//----------------------------------------
/** \brief Create a serial task with storage for dependences.
*
* Postcondition: Task is in the 'constructing' state.
*/
template< class FunctorType >
Future< typename FunctorType::value_type , execution_space >
create( const FunctorType & functor
, const unsigned dependence_capacity /* = default */ ) const ;
/** \brief Create a foreach task with storage for dependences. */
template< class ExecPolicy , class FunctorType >
Future< typename FunctorType::value_type , execution_space >
create_foreach( const ExecPolicy & policy
, const FunctorType & functor
, const unsigned dependence_capacity /* = default */ ) const ;
/** \brief Create a reduce task with storage for dependences. */
template< class ExecPolicy , class FunctorType >
Future< typename FunctorType::value_type , execution_space >
create_reduce( const ExecPolicy & policy
, const FunctorType & functor
, const unsigned dependence_capacity /* = default */ ) const ;
/** \brief Create a scan task with storage for dependences. */
template< class ExecPolicy , class FunctorType >
Future< typename FunctorType::value_type , execution_space >
create_scan( const ExecPolicy & policy
, const FunctorType & functor
, const unsigned dependence_capacity /* = default */ ) const ;
/** \brief Set dependence that 'after' cannot start execution
* until 'before' has completed.
*
* Precondition: The 'after' task must be in then 'Constructing' state.
*/
template< class TA , class TB >
void set_dependence( const Future<TA,execution_space> & after
, const Future<TB,execution_space> & before ) const ;
/** \brief Spawn a task in the 'Constructing' state
*
* Precondition: Task is in the 'constructing' state.
* Postcondition: Task is waiting, executing, or complete.
*/
template< class T >
const Future<T,execution_space> &
spawn( const Future<T,execution_space> & ) const ;
//----------------------------------------
/** \brief Query dependence of an executing task */
template< class FunctorType >
Future< execution_space >
get_dependence( FunctorType * , const int ) const ;
//----------------------------------------
/** \brief Clear current dependences of an executing task
* in preparation for setting new dependences and
* respawning.
*
* Precondition: The functor must be a task in the executing state.
*/
template< class FunctorType >
void clear_dependence( FunctorType * ) const ;
/** \brief Set dependence that 'after' cannot start execution
* until 'before' has completed.
*
* The 'after' functor must be in the executing state
*/
template< class FunctorType , class TB >
void set_dependence( FunctorType * after
, const Future<TB,execution_space> & before ) const ;
/** \brief Respawn (reschedule) an executing task to be called again
* after all dependences have completed.
*/
template< class FunctorType >
void respawn( FunctorType * ) const ;
};
//----------------------------------------------------------------------------
/** \brief Create and spawn a single-thread task */
template< class ExecSpace , class FunctorType >
inline
Future< typename FunctorType::value_type , ExecSpace >
spawn( TaskPolicy<ExecSpace> & policy , const FunctorType & functor )
{ return policy.spawn( policy.create( functor ) ); }
/** \brief Create and spawn a single-thread task with dependences */
template< class ExecSpace , class FunctorType , class Arg0 , class Arg1 >
inline
Future< typename FunctorType::value_type , ExecSpace >
spawn( TaskPolicy<ExecSpace> & policy
, const FunctorType & functor
, const Future<Arg0,Arg1> & before_0
, const Future<Arg0,Arg1> & before_1 )
{
Future< typename FunctorType::value_type , ExecSpace > f ;
f = policy.create( functor , 2 );
policy.add_dependence( f , before_0 );
policy.add_dependence( f , before_1 );
policy.spawn( f );
return f ;
}
//----------------------------------------------------------------------------
/** \brief Create and spawn a parallel_for task */
template< class ExecSpace , class ParallelPolicyType , class FunctorType >
inline
Future< typename FunctorType::value_type , ExecSpace >
spawn_foreach( TaskPolicy<ExecSpace> & task_policy
, const ParallelPolicyType & parallel_policy
, const FunctorType & functor )
{ return task_policy.spawn( task_policy.create_foreach( parallel_policy , functor ) ); }
/** \brief Create and spawn a parallel_reduce task */
template< class ExecSpace , class ParallelPolicyType , class FunctorType >
inline
Future< typename FunctorType::value_type , ExecSpace >
spawn_reduce( TaskPolicy<ExecSpace> & task_policy
, const ParallelPolicyType & parallel_policy
, const FunctorType & functor )
{ return task_policy.spawn( task_policy.create_reduce( parallel_policy , functor ) ); }
//----------------------------------------------------------------------------
/** \brief Respawn a task functor with dependences */
template< class ExecSpace , class FunctorType , class Arg0 , class Arg1 >
inline
void respawn( TaskPolicy<ExecSpace> & policy
, FunctorType * functor
, const Future<Arg0,Arg1> & before_0
, const Future<Arg0,Arg1> & before_1
)
{
policy.clear_dependence( functor );
policy.add_dependence( functor , before_0 );
policy.add_dependence( functor , before_1 );
policy.respawn( functor );
}
//----------------------------------------------------------------------------
template< class ExecSpace >
void wait( TaskPolicy< ExecSpace > & );
} /* namespace Experimental */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #define KOKKOS_TASKPOLICY_HPP */

View File

@ -1,217 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_THREADS_HPP
#define KOKKOS_THREADS_HPP
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_PTHREAD )
#include <cstddef>
#include <iosfwd>
#include <Kokkos_HostSpace.hpp>
#include <Kokkos_ScratchSpace.hpp>
#include <Kokkos_Layout.hpp>
#include <Kokkos_MemoryTraits.hpp>
#include <impl/Kokkos_Tags.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
class ThreadsExec ;
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
namespace Kokkos {
/** \brief Execution space for a pool of Pthreads or C11 threads on a CPU. */
class Threads {
public:
//! \name Type declarations that all Kokkos devices must provide.
//@{
//! Tag this class as a kokkos execution space
typedef Threads execution_space ;
typedef Kokkos::HostSpace memory_space ;
//! This execution space preferred device_type
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef Kokkos::LayoutRight array_layout ;
typedef memory_space::size_type size_type ;
typedef ScratchMemorySpace< Threads > scratch_memory_space ;
//@}
/*------------------------------------------------------------------------*/
//! \name Static functions that all Kokkos devices must implement.
//@{
/// \brief True if and only if this method is being called in a
/// thread-parallel function.
static int in_parallel();
/** \brief Set the device in a "sleep" state.
*
* This function sets the device in a "sleep" state in which it is
* not ready for work. This may consume less resources than if the
* device were in an "awake" state, but it may also take time to
* bring the device from a sleep state to be ready for work.
*
* \return True if the device is in the "sleep" state, else false if
* the device is actively working and could not enter the "sleep"
* state.
*/
static bool sleep();
/// \brief Wake the device from the 'sleep' state so it is ready for work.
///
/// \return True if the device is in the "ready" state, else "false"
/// if the device is actively working (which also means that it's
/// awake).
static bool wake();
/// \brief Wait until all dispatched functors complete.
///
/// The parallel_for or parallel_reduce dispatch of a functor may
/// return asynchronously, before the functor completes. This
/// method does not return until all dispatched functors on this
/// device have completed.
static void fence();
/// \brief Free any resources being consumed by the device.
///
/// For the Threads device, this terminates spawned worker threads.
static void finalize();
/// \brief Print configuration information to the given output stream.
static void print_configuration( std::ostream & , const bool detail = false );
//@}
/*------------------------------------------------------------------------*/
/*------------------------------------------------------------------------*/
//! \name Space-specific functions
//@{
/** \brief Initialize the device in the "ready to work" state.
*
* The device is initialized in a "ready to work" or "awake" state.
* This state reduces latency and thus improves performance when
* dispatching work. However, the "awake" state consumes resources
* even when no work is being done. You may call sleep() to put
* the device in a "sleeping" state that does not consume as many
* resources, but it will take time (latency) to awaken the device
* again (via the wake()) method so that it is ready for work.
*
* Teams of threads are distributed as evenly as possible across
* the requested number of numa regions and cores per numa region.
* A team will not be split across a numa region.
*
* If the 'use_' arguments are not supplied the hwloc is queried
* to use all available cores.
*/
static void initialize( unsigned threads_count = 0 ,
unsigned use_numa_count = 0 ,
unsigned use_cores_per_numa = 0 ,
bool allow_asynchronous_threadpool = false );
static int is_initialized();
static Threads & instance( int = 0 );
//----------------------------------------
static int thread_pool_size( int depth = 0 );
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
static int thread_pool_rank();
#else
KOKKOS_INLINE_FUNCTION static int thread_pool_rank() { return 0 ; }
#endif
inline static unsigned max_hardware_threads() { return thread_pool_size(0); }
KOKKOS_INLINE_FUNCTION static unsigned hardware_thread_id() { return thread_pool_rank(); }
//@}
//----------------------------------------
};
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
template<>
struct VerifyExecutionCanAccessMemorySpace
< Kokkos::Threads::memory_space
, Kokkos::Threads::scratch_memory_space
>
{
enum { value = true };
inline static void verify( void ) { }
inline static void verify( const void * ) { }
};
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
#include <Kokkos_ExecPolicy.hpp>
#include <Kokkos_Parallel.hpp>
#include <Threads/Kokkos_ThreadsExec.hpp>
#include <Threads/Kokkos_ThreadsTeam.hpp>
#include <Threads/Kokkos_Threads_Parallel.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
#endif /* #define KOKKOS_THREADS_HPP */

View File

@ -1,53 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_Vectorization.hpp
/// \brief Declaration and definition of Kokkos::Vectorization interface.
#ifndef KOKKOS_VECTORIZATION_HPP
#define KOKKOS_VECTORIZATION_HPP
#if defined( KOKKOS_HAVE_CUDA )
#include <Cuda/Kokkos_Cuda_Vectorization.hpp>
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,140 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_HWLOC_HPP
#define KOKKOS_HWLOC_HPP
#include <utility>
namespace Kokkos {
/** \brief Minimal subset of logical 'hwloc' functionality available
* from http://www.open-mpi.org/projects/hwloc/.
*
* The calls are NOT thread safe in order to avoid mutexes,
* memory allocations, or other actions which could give the
* runtime system an opportunity to migrate the threads or
* touch allocated memory during the function calls.
*
* All calls to these functions should be performed by a thread
* when it has guaranteed exclusive access; e.g., for OpenMP
* within a 'critical' region.
*/
namespace hwloc {
/** \brief Query if hwloc is available */
bool available();
/** \brief Query number of available NUMA regions.
* This will be less than the hardware capacity
* if the MPI process is pinned to a NUMA region.
*/
unsigned get_available_numa_count();
/** \brief Query number of available cores per NUMA regions.
* This will be less than the hardware capacity
* if the MPI process is pinned to a set of cores.
*/
unsigned get_available_cores_per_numa();
/** \brief Query number of available "hard" threads per core; i.e., hyperthreads */
unsigned get_available_threads_per_core();
} /* namespace hwloc */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Internal functions for binding persistent spawned threads.
namespace Kokkos {
namespace hwloc {
/** \brief Recommend mapping of threads onto cores.
*
* If thread_count == 0 then choose and set a value.
* If use_numa_count == 0 then choose and set a value.
* If use_cores_per_numa == 0 then choose and set a value.
*
* Return 0 if asynchronous,
* Return 1 if synchronous and threads_coord[0] is process core
*/
unsigned thread_mapping( const char * const label ,
const bool allow_async ,
unsigned & thread_count ,
unsigned & use_numa_count ,
unsigned & use_cores_per_numa ,
std::pair<unsigned,unsigned> threads_coord[] );
/** \brief Query core-coordinate of the current thread
* with respect to the core_topology.
*
* As long as the thread is running within the
* process binding the following condition holds.
*
* core_coordinate.first < core_topology.first
* core_coordinate.second < core_topology.second
*/
std::pair<unsigned,unsigned> get_this_thread_coordinate();
/** \brief Bind the current thread to a core. */
bool bind_this_thread( const std::pair<unsigned,unsigned> );
/** \brief Bind the current thread to one of the cores in the list.
* Set that entry to (~0,~0) and return the index.
* If binding fails return ~0.
*/
unsigned bind_this_thread( const unsigned coordinate_count ,
std::pair<unsigned,unsigned> coordinate[] );
/** \brief Unbind the current thread back to the original process binding */
bool unbind_this_thread();
} /* namespace hwloc */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #define KOKKOS_HWLOC_HPP */

View File

@ -1,118 +0,0 @@
KOKKOS_PATH = ../..
PREFIX ?= /usr/local/lib/kokkos
default: messages build-lib
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = nvcc_wrapper
LINKFLAGS ?=
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= g++
LINKFLAGS ?=
endif
PWD = $(shell pwd)
KOKKOS_HEADERS_INCLUDE = $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
KOKKOS_HEADERS_INCLUDE_IMPL = $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
KOKKOS_HEADERS_INCLUDE_IMPL += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
CONDITIONAL_COPIES =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
CONDITIONAL_COPIES += copy-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
CONDITIONAL_COPIES += copy-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
CONDITIONAL_COPIES += copy-openmp
endif
messages:
echo "Start Build"
build-makefile-kokkos:
rm -f Makefile.kokkos
echo "#Global Settings used to generate this library" >> Makefile.kokkos
echo "KOKKOS_PATH = $(PREFIX)" >> Makefile.kokkos
echo "KOKKOS_DEVICES = $(KOKKOS_DEVICES)" >> Makefile.kokkos
echo "KOKKOS_ARCH = $(KOKKOS_ARCH)" >> Makefile.kokkos
echo "KOKKOS_DEBUG = $(KOKKOS_DEBUG)" >> Makefile.kokkos
echo "KOKKOS_USE_TPLS = $(KOKKOS_USE_TPLS)" >> Makefile.kokkos
echo "KOKKOS_CXX_STANDARD = $(KOKKOS_CXX_STANDARD)" >> Makefile.kokkos
echo "KOKKOS_CUDA_OPTIONS = $(KOKKOS_CUDA_OPTIONS)" >> Makefile.kokkos
echo "CXX ?= $(CXX)" >> Makefile.kokkos
echo "" >> Makefile.kokkos
echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> Makefile.kokkos
echo "KOKKOS_HEADERS = $(KOKKOS_HEADERS)" >> Makefile.kokkos
echo "KOKKOS_SRC = $(KOKKOS_SRC)" >> Makefile.kokkos
echo "" >> Makefile.kokkos
echo "#Variables used in application Makefiles" >> Makefile.kokkos
echo "KOKKOS_CPP_DEPENDS = $(KOKKOS_CPP_DEPENDS)" >> Makefile.kokkos
echo "KOKKOS_CXXFLAGS = $(KOKKOS_CXXFLAGS)" >> Makefile.kokkos
echo "KOKKOS_CPPFLAGS = $(KOKKOS_CPPFLAGS)" >> Makefile.kokkos
echo "KOKKOS_LINK_DEPENDS = $(KOKKOS_LINK_DEPENDS)" >> Makefile.kokkos
echo "KOKKOS_LIBS = $(KOKKOS_LIBS)" >> Makefile.kokkos
echo "KOKKOS_LDFLAGS = $(KOKKOS_LDFLAGS)" >> Makefile.kokkos
sed \
-e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \
-e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \
-e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \
-e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \
-e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \
-e 's|= KokkosCore_config.h|= $(PREFIX)/include/KokkosCore_config.h|g' Makefile.kokkos \
> Makefile.kokkos.tmp
mv -f Makefile.kokkos.tmp Makefile.kokkos
build-lib: build-makefile-kokkos $(KOKKOS_LINK_DEPENDS)
mkdir:
mkdir -p $(PREFIX)
mkdir -p $(PREFIX)/include
mkdir -p $(PREFIX)/lib
mkdir -p $(PREFIX)/include/impl
copy-cuda: mkdir
mkdir -p $(PREFIX)/include/Cuda
cp $(KOKKOS_HEADERS_CUDA) $(PREFIX)/include/Cuda
copy-threads: mkdir
mkdir -p $(PREFIX)/include/Threads
cp $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads
copy-openmp: mkdir
mkdir -p $(PREFIX)/include/OpenMP
cp $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP
install: mkdir $(CONDITIONAL_COPIES) build-lib
cp $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include
cp $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl
cp Makefile.kokkos $(PREFIX)
cp libkokkos.a $(PREFIX)/lib
cp KokkosCore_config.h $(PREFIX)/include
clean: kokkos-clean
rm Makefile.kokkos

View File

@ -1,496 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_OPENMP_PARALLEL_HPP
#define KOKKOS_OPENMP_PARALLEL_HPP
#include <omp.h>
#include <Kokkos_Parallel.hpp>
#include <OpenMP/Kokkos_OpenMPexec.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
class ParallelFor< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > Policy ;
template< class PType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< Impl::is_same< typename PType::work_tag , void >::value ,
const FunctorType & >::type functor
, const PType & range )
{
const typename PType::member_type work_end = range.end();
for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
functor( iwork );
}
}
template< class PType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< ! Impl::is_same< typename PType::work_tag , void >::value ,
const FunctorType & >::type functor
, const PType & range )
{
const typename PType::member_type work_end = range.end();
for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
functor( typename PType::work_tag() , iwork );
}
}
public:
inline
ParallelFor( const FunctorType & functor
, const Policy & policy )
{
OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for");
OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for");
#pragma omp parallel
{
OpenMPexec & exec = * OpenMPexec::get_thread_omp();
driver( functor , typename Policy::WorkRange( policy , exec.pool_rank() , exec.pool_size() ) );
}
/* END #pragma omp parallel */
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
class ParallelReduce< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > Policy ;
typedef typename Policy::work_tag WorkTag ;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , WorkTag > ValueJoin ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
template< class PType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< Impl::is_same< typename PType::work_tag , void >::value ,
const FunctorType & >::type functor
, reference_type update
, const PType & range )
{
const typename PType::member_type work_end = range.end();
for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
functor( iwork , update );
}
}
template< class PType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< ! Impl::is_same< typename PType::work_tag , void >::value ,
const FunctorType & >::type functor
, reference_type update
, const PType & range )
{
const typename PType::member_type work_end = range.end();
for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
functor( typename PType::work_tag() , iwork , update );
}
}
public:
//----------------------------------------
template< class ViewType >
inline
ParallelReduce( typename Impl::enable_if<
( Impl::is_view< ViewType >::value &&
Impl::is_same< typename ViewType::memory_space , HostSpace >::value
), const FunctorType & >::type functor
, const Policy & policy
, const ViewType & result_view )
{
OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce");
OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce");
OpenMPexec::resize_scratch( ValueTraits::value_size( functor ) , 0 );
#pragma omp parallel
{
OpenMPexec & exec = * OpenMPexec::get_thread_omp();
driver( functor
, ValueInit::init( functor , exec.scratch_reduce() )
, typename Policy::WorkRange( policy , exec.pool_rank() , exec.pool_size() )
);
}
/* END #pragma omp parallel */
{
const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() );
for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) {
ValueJoin::join( functor , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() );
}
Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , ptr );
if ( result_view.ptr_on_device() ) {
const int n = ValueTraits::value_count( functor );
for ( int j = 0 ; j < n ; ++j ) { result_view.ptr_on_device()[j] = ptr[j] ; }
}
}
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
class ParallelScan< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > >
{
private:
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > Policy ;
typedef typename Policy::work_tag WorkTag ;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , WorkTag > ValueJoin ;
typedef Kokkos::Impl::FunctorValueOps< FunctorType , WorkTag > ValueOps ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
template< class PType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< Impl::is_same< typename PType::work_tag , void >::value ,
const FunctorType & >::type functor
, reference_type update
, const PType & range
, const bool final )
{
const typename PType::member_type work_end = range.end();
for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
functor( iwork , update , final );
}
}
template< class PType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< ! Impl::is_same< typename PType::work_tag , void >::value ,
const FunctorType & >::type functor
, reference_type update
, const PType & range
, const bool final )
{
const typename PType::member_type work_end = range.end();
for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
functor( typename PType::work_tag() , iwork , update , final );
}
}
public:
//----------------------------------------
inline
ParallelScan( const FunctorType & functor
, const Policy & policy )
{
OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_scan");
OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_scan");
OpenMPexec::resize_scratch( 2 * ValueTraits::value_size( functor ) , 0 );
#pragma omp parallel
{
OpenMPexec & exec = * OpenMPexec::get_thread_omp();
driver( functor
, ValueInit::init( functor , pointer_type( exec.scratch_reduce() ) + ValueTraits::value_count( functor ) )
, typename Policy::WorkRange( policy , exec.pool_rank() , exec.pool_size() )
, false );
}
/* END #pragma omp parallel */
{
const unsigned thread_count = OpenMPexec::pool_size();
const unsigned value_count = ValueTraits::value_count( functor );
pointer_type ptr_prev = 0 ;
for ( unsigned rank_rev = thread_count ; rank_rev-- ; ) {
pointer_type ptr = pointer_type( OpenMPexec::pool_rev(rank_rev)->scratch_reduce() );
if ( ptr_prev ) {
for ( unsigned i = 0 ; i < value_count ; ++i ) { ptr[i] = ptr_prev[ i + value_count ] ; }
ValueJoin::join( functor , ptr + value_count , ptr );
}
else {
ValueInit::init( functor , ptr );
}
ptr_prev = ptr ;
}
}
#pragma omp parallel
{
OpenMPexec & exec = * OpenMPexec::get_thread_omp();
driver( functor
, ValueOps::reference( pointer_type( exec.scratch_reduce() ) )
, typename Policy::WorkRange( policy , exec.pool_rank() , exec.pool_size() )
, true );
}
/* END #pragma omp parallel */
}
//----------------------------------------
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class FunctorType , class Arg0 , class Arg1 >
class ParallelFor< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > >
{
private:
typedef Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > Policy ;
template< class TagType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
const FunctorType & >::type functor
, const typename Policy::member_type & member )
{ functor( member ); }
template< class TagType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
const FunctorType & >::type functor
, const typename Policy::member_type & member )
{ functor( TagType() , member ); }
public:
inline
ParallelFor( const FunctorType & functor ,
const Policy & policy )
{
OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for");
OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for");
const size_t team_reduce_size = Policy::member_type::team_reduce_size();
const size_t team_shmem_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
OpenMPexec::resize_scratch( 0 , team_reduce_size + team_shmem_size );
#pragma omp parallel
{
typename Policy::member_type member( * OpenMPexec::get_thread_omp() , policy , team_shmem_size );
for ( ; member.valid() ; member.next() ) {
ParallelFor::template driver< typename Policy::work_tag >( functor , member );
}
}
/* END #pragma omp parallel */
}
void wait() {}
};
template< class FunctorType , class Arg0 , class Arg1 >
class ParallelReduce< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > >
{
private:
typedef Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > Policy ;
typedef typename Policy::work_tag WorkTag ;
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , WorkTag > ValueJoin ;
typedef typename ValueTraits::pointer_type pointer_type ;
typedef typename ValueTraits::reference_type reference_type ;
template< class PType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< Impl::is_same< typename PType::work_tag , void >::value ,
const FunctorType & >::type functor
, const typename PType::member_type & member
, reference_type update )
{ functor( member , update ); }
template< class PType >
KOKKOS_FORCEINLINE_FUNCTION static
void driver( typename Impl::enable_if< ! Impl::is_same< typename PType::work_tag , void >::value ,
const FunctorType & >::type functor
, const typename PType::member_type & member
, reference_type update )
{ functor( typename PType::work_tag() , member , update ); }
public:
inline
ParallelReduce( const FunctorType & functor ,
const Policy & policy )
{
OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce");
const size_t team_reduce_size = Policy::member_type::team_reduce_size();
const size_t team_shmem_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
OpenMPexec::resize_scratch( ValueTraits::value_size( functor ) , team_reduce_size + team_shmem_size );
#pragma omp parallel
{
OpenMPexec & exec = * OpenMPexec::get_thread_omp();
reference_type update = ValueInit::init( functor , exec.scratch_reduce() );
for ( typename Policy::member_type member( exec , policy , team_shmem_size ); member.valid() ; member.next() ) {
ParallelReduce::template driver< Policy >( functor , member , update );
}
}
/* END #pragma omp parallel */
{
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , WorkTag , reference_type > Join ;
const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() );
for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) {
Join::join( functor , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() );
}
Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , ptr );
}
}
template< class ViewType >
inline
ParallelReduce( const FunctorType & functor ,
const Policy & policy ,
const ViewType & result )
{
OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce");
const size_t team_reduce_size = Policy::member_type::team_reduce_size();
const size_t team_shmem_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
OpenMPexec::resize_scratch( ValueTraits::value_size( functor ) , team_reduce_size + team_shmem_size );
#pragma omp parallel
{
OpenMPexec & exec = * OpenMPexec::get_thread_omp();
reference_type update = ValueInit::init( functor , exec.scratch_reduce() );
for ( typename Policy::member_type member( exec , policy , team_shmem_size ); member.valid() ; member.next() ) {
ParallelReduce::template driver< Policy >( functor , member , update );
}
}
/* END #pragma omp parallel */
{
const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() );
for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) {
ValueJoin::join( functor , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() );
}
Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , ptr );
const int n = ValueTraits::value_count( functor );
for ( int j = 0 ; j < n ; ++j ) { result.ptr_on_device()[j] = ptr[j] ; }
}
}
void wait() {}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* KOKKOS_OPENMP_PARALLEL_HPP */

View File

@ -1,364 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdio.h>
#include <limits>
#include <iostream>
#include <vector>
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
#include <iostream>
#ifdef KOKKOS_HAVE_OPENMP
namespace Kokkos {
namespace Impl {
namespace {
KOKKOS_INLINE_FUNCTION
int kokkos_omp_in_parallel();
int kokkos_omp_in_critical_region = ( Kokkos::HostSpace::register_in_parallel( kokkos_omp_in_parallel ) , 0 );
KOKKOS_INLINE_FUNCTION
int kokkos_omp_in_parallel()
{
#ifndef __CUDA_ARCH__
return omp_in_parallel() && ! kokkos_omp_in_critical_region ;
#else
return 0;
#endif
}
bool s_using_hwloc = false;
} // namespace
} // namespace Impl
} // namespace Kokkos
namespace Kokkos {
namespace Impl {
int OpenMPexec::m_map_rank[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 };
int OpenMPexec::m_pool_topo[ 4 ] = { 0 };
OpenMPexec::Pool OpenMPexec::m_pool;
void OpenMPexec::verify_is_process( const char * const label )
{
if ( omp_in_parallel() ) {
std::string msg( label );
msg.append( " ERROR: in parallel" );
Kokkos::Impl::throw_runtime_exception( msg );
}
}
void OpenMPexec::verify_initialized( const char * const label )
{
if ( 0 == m_pool[0] ) {
std::string msg( label );
msg.append( " ERROR: not initialized" );
Kokkos::Impl::throw_runtime_exception( msg );
}
}
void OpenMPexec::clear_scratch()
{
#pragma omp parallel
{
const int rank_rev = m_map_rank[ omp_get_thread_num() ];
m_pool.at(rank_rev).clear();
}
/* END #pragma omp parallel */
}
void OpenMPexec::resize_scratch( size_t reduce_size , size_t thread_size )
{
enum { ALIGN_MASK = Kokkos::Impl::MEMORY_ALIGNMENT - 1 };
enum { ALLOC_EXEC = ( sizeof(OpenMPexec) + ALIGN_MASK ) & ~ALIGN_MASK };
const size_t old_reduce_size = m_pool[0] ? m_pool[0]->m_scratch_reduce_end : 0 ;
const size_t old_thread_size = m_pool[0] ? m_pool[0]->m_scratch_thread_end - m_pool[0]->m_scratch_reduce_end : 0 ;
reduce_size = ( reduce_size + ALIGN_MASK ) & ~ALIGN_MASK ;
thread_size = ( thread_size + ALIGN_MASK ) & ~ALIGN_MASK ;
// Requesting allocation and old allocation is too small:
const bool allocate = ( old_reduce_size < reduce_size ) ||
( old_thread_size < thread_size );
if ( allocate ) {
if ( reduce_size < old_reduce_size ) { reduce_size = old_reduce_size ; }
if ( thread_size < old_thread_size ) { thread_size = old_thread_size ; }
}
const size_t alloc_size = allocate ? ALLOC_EXEC + reduce_size + thread_size : 0 ;
const int pool_size = m_pool_topo[0] ;
if ( allocate ) {
clear_scratch();
#pragma omp parallel
{
const int rank_rev = m_map_rank[ omp_get_thread_num() ];
const int rank = pool_size - ( rank_rev + 1 );
m_pool.at(rank_rev) = HostSpace::allocate_and_track( "openmp_scratch", alloc_size );
new ( m_pool[ rank_rev ] ) OpenMPexec( rank , ALLOC_EXEC , reduce_size , thread_size );
}
/* END #pragma omp parallel */
}
}
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
//----------------------------------------------------------------------------
int OpenMP::is_initialized()
{ return 0 != Impl::OpenMPexec::m_pool[0]; }
void OpenMP::initialize( unsigned thread_count ,
unsigned use_numa_count ,
unsigned use_cores_per_numa )
{
// Before any other call to OMP query the maximum number of threads
// and save the value for re-initialization unit testing.
//Using omp_get_max_threads(); is problematic in conjunction with
//Hwloc on Intel (essentially an initial call to the OpenMP runtime
//without a parallel region before will set a process mask for a single core
//The runtime will than bind threads for a parallel region to other cores on the
//entering the first parallel region and make the process mask the aggregate of
//the thread masks. The intend seems to be to make serial code run fast, if you
//compile with OpenMP enabled but don't actually use parallel regions or so
//static int omp_max_threads = omp_get_max_threads();
int nthreads = 0;
#pragma omp parallel
{
#pragma omp atomic
nthreads++;
}
static int omp_max_threads = nthreads;
const bool is_initialized = 0 != Impl::OpenMPexec::m_pool[0] ;
bool thread_spawn_failed = false ;
if ( ! is_initialized ) {
// Use hwloc thread pinning if concerned with locality.
// If spreading threads across multiple NUMA regions.
// If hyperthreading is enabled.
Impl::s_using_hwloc = hwloc::available() && (
( 1 < Kokkos::hwloc::get_available_numa_count() ) ||
( 1 < Kokkos::hwloc::get_available_threads_per_core() ) );
std::pair<unsigned,unsigned> threads_coord[ Impl::OpenMPexec::MAX_THREAD_COUNT ];
// If hwloc available then use it's maximum value.
if ( thread_count == 0 ) {
thread_count = Impl::s_using_hwloc
? Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core()
: omp_max_threads ;
}
if(Impl::s_using_hwloc)
hwloc::thread_mapping( "Kokkos::OpenMP::initialize" ,
false /* do not allow asynchronous */ ,
thread_count ,
use_numa_count ,
use_cores_per_numa ,
threads_coord );
// Spawn threads:
omp_set_num_threads( thread_count );
// Verify OMP interaction:
if ( int(thread_count) != omp_get_max_threads() ) {
thread_spawn_failed = true ;
}
// Verify spawning and bind threads:
#pragma omp parallel
{
#pragma omp critical
{
if ( int(thread_count) != omp_get_num_threads() ) {
thread_spawn_failed = true ;
}
// Call to 'bind_this_thread' is not thread safe so place this whole block in a critical region.
// Call to 'new' may not be thread safe as well.
// Reverse the rank for threads so that the scan operation reduces to the highest rank thread.
const unsigned omp_rank = omp_get_thread_num();
const unsigned thread_r = Impl::s_using_hwloc ? Kokkos::hwloc::bind_this_thread( thread_count , threads_coord ) : omp_rank ;
Impl::OpenMPexec::m_map_rank[ omp_rank ] = thread_r ;
}
/* END #pragma omp critical */
}
/* END #pragma omp parallel */
if ( ! thread_spawn_failed ) {
Impl::OpenMPexec::m_pool_topo[0] = thread_count ;
Impl::OpenMPexec::m_pool_topo[1] = Impl::s_using_hwloc ? thread_count / use_numa_count : thread_count;
Impl::OpenMPexec::m_pool_topo[2] = Impl::s_using_hwloc ? thread_count / ( use_numa_count * use_cores_per_numa ) : 1;
Impl::OpenMPexec::resize_scratch( 1024 , 1024 );
}
}
if ( is_initialized || thread_spawn_failed ) {
std::string msg("Kokkos::OpenMP::initialize ERROR");
if ( is_initialized ) { msg.append(" : already initialized"); }
if ( thread_spawn_failed ) { msg.append(" : failed spawning threads"); }
Kokkos::Impl::throw_runtime_exception(msg);
}
// Init the array for used for arbitrarily sized atomics
Impl::init_lock_array_host_space();
}
//----------------------------------------------------------------------------
void OpenMP::finalize()
{
Impl::OpenMPexec::verify_initialized( "OpenMP::finalize" );
Impl::OpenMPexec::verify_is_process( "OpenMP::finalize" );
Impl::OpenMPexec::clear_scratch();
Impl::OpenMPexec::m_pool_topo[0] = 0 ;
Impl::OpenMPexec::m_pool_topo[1] = 0 ;
Impl::OpenMPexec::m_pool_topo[2] = 0 ;
omp_set_num_threads(1);
if ( Impl::s_using_hwloc ) {
hwloc::unbind_this_thread();
}
}
//----------------------------------------------------------------------------
void OpenMP::print_configuration( std::ostream & s , const bool detail )
{
Impl::OpenMPexec::verify_is_process( "OpenMP::print_configuration" );
s << "Kokkos::OpenMP" ;
#if defined( KOKKOS_HAVE_OPENMP )
s << " KOKKOS_HAVE_OPENMP" ;
#endif
#if defined( KOKKOS_HAVE_HWLOC )
const unsigned numa_count_ = Kokkos::hwloc::get_available_numa_count();
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
s << " hwloc[" << numa_count_ << "x" << cores_per_numa << "x" << threads_per_core << "]"
<< " hwloc_binding_" << ( Impl::s_using_hwloc ? "enabled" : "disabled" )
;
#endif
const bool is_initialized = 0 != Impl::OpenMPexec::m_pool[0] ;
if ( is_initialized ) {
const int numa_count = Kokkos::Impl::OpenMPexec::m_pool_topo[0] / Kokkos::Impl::OpenMPexec::m_pool_topo[1] ;
const int core_per_numa = Kokkos::Impl::OpenMPexec::m_pool_topo[1] / Kokkos::Impl::OpenMPexec::m_pool_topo[2] ;
const int thread_per_core = Kokkos::Impl::OpenMPexec::m_pool_topo[2] ;
s << " thread_pool_topology[ " << numa_count
<< " x " << core_per_numa
<< " x " << thread_per_core
<< " ]"
<< std::endl ;
if ( detail ) {
std::vector< std::pair<unsigned,unsigned> > coord( Kokkos::Impl::OpenMPexec::m_pool_topo[0] );
#pragma omp parallel
{
#pragma omp critical
{
coord[ omp_get_thread_num() ] = hwloc::get_this_thread_coordinate();
}
/* END #pragma omp critical */
}
/* END #pragma omp parallel */
for ( unsigned i = 0 ; i < coord.size() ; ++i ) {
s << " thread omp_rank[" << i << "]"
<< " kokkos_rank[" << Impl::OpenMPexec::m_map_rank[ i ] << "]"
<< " hwloc_coord[" << coord[i].first << "." << coord[i].second << "]"
<< std::endl ;
}
}
}
else {
s << " not initialized" << std::endl ;
}
}
} // namespace Kokkos
#endif //KOKKOS_HAVE_OPENMP

View File

@ -1,767 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_OPENMPEXEC_HPP
#define KOKKOS_OPENMPEXEC_HPP
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_spinwait.hpp>
#include <impl/Kokkos_AllocationTracker.hpp>
#include <Kokkos_Atomic.hpp>
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
/** \brief Data for OpenMP thread execution */
class OpenMPexec {
public:
enum { MAX_THREAD_COUNT = 4096 };
struct Pool
{
Pool() : m_trackers() {}
AllocationTracker m_trackers[ MAX_THREAD_COUNT ];
OpenMPexec * operator[](int i)
{
return reinterpret_cast<OpenMPexec *>(m_trackers[i].alloc_ptr());
}
AllocationTracker & at(int i)
{
return m_trackers[i];
}
};
private:
static int m_pool_topo[ 4 ];
static int m_map_rank[ MAX_THREAD_COUNT ];
static Pool m_pool; // Indexed by: m_pool_rank_rev
friend class Kokkos::OpenMP ;
int const m_pool_rank ;
int const m_pool_rank_rev ;
int const m_scratch_exec_end ;
int const m_scratch_reduce_end ;
int const m_scratch_thread_end ;
int volatile m_barrier_state ;
OpenMPexec();
OpenMPexec( const OpenMPexec & );
OpenMPexec & operator = ( const OpenMPexec & );
static void clear_scratch();
public:
// Topology of a cache coherent thread pool:
// TOTAL = NUMA x GRAIN
// pool_size( depth = 0 )
// pool_size(0) = total number of threads
// pool_size(1) = number of threads per NUMA
// pool_size(2) = number of threads sharing finest grain memory hierarchy
inline static
int pool_size( int depth = 0 ) { return m_pool_topo[ depth ]; }
inline static
OpenMPexec * pool_rev( int pool_rank_rev ) { return m_pool[ pool_rank_rev ]; }
inline int pool_rank() const { return m_pool_rank ; }
inline int pool_rank_rev() const { return m_pool_rank_rev ; }
inline void * scratch_reduce() const { return ((char *) this) + m_scratch_exec_end ; }
inline void * scratch_thread() const { return ((char *) this) + m_scratch_reduce_end ; }
inline
void state_wait( int state )
{ Impl::spinwait( m_barrier_state , state ); }
inline
void state_set( int state ) { m_barrier_state = state ; }
~OpenMPexec() {}
OpenMPexec( const int poolRank
, const int scratch_exec_size
, const int scratch_reduce_size
, const int scratch_thread_size )
: m_pool_rank( poolRank )
, m_pool_rank_rev( pool_size() - ( poolRank + 1 ) )
, m_scratch_exec_end( scratch_exec_size )
, m_scratch_reduce_end( m_scratch_exec_end + scratch_reduce_size )
, m_scratch_thread_end( m_scratch_reduce_end + scratch_thread_size )
, m_barrier_state(0)
{}
static void finalize();
static void initialize( const unsigned team_count ,
const unsigned threads_per_team ,
const unsigned numa_count ,
const unsigned cores_per_numa );
static void verify_is_process( const char * const );
static void verify_initialized( const char * const );
static void resize_scratch( size_t reduce_size , size_t thread_size );
inline static
OpenMPexec * get_thread_omp() { return m_pool[ m_map_rank[ omp_get_thread_num() ] ]; }
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
class OpenMPexecTeamMember {
private:
enum { TEAM_REDUCE_SIZE = 512 };
/** \brief Thread states for team synchronization */
enum { Active = 0 , Rendezvous = 1 };
typedef Kokkos::OpenMP execution_space ;
typedef execution_space::scratch_memory_space scratch_memory_space ;
Impl::OpenMPexec & m_exec ;
scratch_memory_space m_team_shared ;
int m_team_shmem ;
int m_team_base_rev ;
int m_team_rank_rev ;
int m_team_rank ;
int m_team_size ;
int m_league_rank ;
int m_league_end ;
int m_league_size ;
// Fan-in team threads, root of the fan-in which does not block returns true
inline
bool team_fan_in() const
{
for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) {
m_exec.pool_rev( m_team_base_rev + j )->state_wait( Active );
}
if ( m_team_rank_rev ) {
m_exec.state_set( Rendezvous );
m_exec.state_wait( Rendezvous );
}
return 0 == m_team_rank_rev ;
}
inline
void team_fan_out() const
{
for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) {
m_exec.pool_rev( m_team_base_rev + j )->state_set( Active );
}
}
public:
KOKKOS_INLINE_FUNCTION
const execution_space::scratch_memory_space & team_shmem() const
{ return m_team_shared ; }
KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; }
KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; }
KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; }
KOKKOS_INLINE_FUNCTION void team_barrier() const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{}
#else
{
if ( 1 < m_team_size ) {
team_fan_in();
team_fan_out();
}
}
#endif
template<class ValueType>
KOKKOS_INLINE_FUNCTION
void team_broadcast(ValueType& value, const int& thread_id) const
{
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ }
#else
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE
, ValueType , void >::type type ;
type * const local_value = ((type*) m_exec.scratch_thread());
if(team_rank() == thread_id)
*local_value = value;
memory_fence();
team_barrier();
value = *local_value;
#endif
}
#ifdef KOKKOS_HAVE_CXX11
template< class ValueType, class JoinOp >
KOKKOS_INLINE_FUNCTION ValueType
team_reduce( const ValueType & value
, const JoinOp & op_in ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return ValueType(); }
#else
{
typedef ValueType value_type;
const JoinLambdaAdapter<value_type,JoinOp> op(op_in);
#endif
#else // KOKKOS_HAVE_CXX11
template< class JoinOp >
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
team_reduce( const typename JoinOp::value_type & value
, const JoinOp & op ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return typename JoinOp::value_type(); }
#else
{
typedef typename JoinOp::value_type value_type;
#endif
#endif // KOKKOS_HAVE_CXX11
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE
, value_type , void >::type type ;
type * const local_value = ((type*) m_exec.scratch_thread());
// Set this thread's contribution
*local_value = value ;
// Fence to make sure the base team member has access:
memory_fence();
if ( team_fan_in() ) {
// The last thread to synchronize returns true, all other threads wait for team_fan_out()
type * const team_value = ((type*) m_exec.pool_rev( m_team_base_rev )->scratch_thread());
// Join to the team value:
for ( int i = 1 ; i < m_team_size ; ++i ) {
op.join( *team_value , *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()) );
}
// The base team member may "lap" the other team members,
// copy to their local value before proceeding.
for ( int i = 1 ; i < m_team_size ; ++i ) {
*((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()) = *team_value ;
}
// Fence to make sure all team members have access
memory_fence();
}
team_fan_out();
return *((type volatile const *)local_value);
}
#endif
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
* with intra-team non-deterministic ordering accumulation.
*
* The global inter-team accumulation value will, at the end of the
* league's parallel execution, be the scan's total.
* Parallel execution ordering of the league's teams is non-deterministic.
* As such the base value for each team's scan operation is similarly
* non-deterministic.
*/
template< typename ArgType >
KOKKOS_INLINE_FUNCTION ArgType team_scan( const ArgType & value , ArgType * const global_accum ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return ArgType(); }
#else
{
// Make sure there is enough scratch space:
typedef typename if_c< sizeof(ArgType) < TEAM_REDUCE_SIZE , ArgType , void >::type type ;
volatile type * const work_value = ((type*) m_exec.scratch_thread());
*work_value = value ;
memory_fence();
if ( team_fan_in() ) {
// The last thread to synchronize returns true, all other threads wait for team_fan_out()
// m_team_base[0] == highest ranking team member
// m_team_base[ m_team_size - 1 ] == lowest ranking team member
//
// 1) copy from lower to higher rank, initialize lowest rank to zero
// 2) prefix sum from lowest to highest rank, skipping lowest rank
type accum = 0 ;
if ( global_accum ) {
for ( int i = m_team_size ; i-- ; ) {
type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread());
accum += val ;
}
accum = atomic_fetch_add( global_accum , accum );
}
for ( int i = m_team_size ; i-- ; ) {
type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread());
const type offset = accum ;
accum += val ;
val = offset ;
}
memory_fence();
}
team_fan_out();
return *work_value ;
}
#endif
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
*
* The highest rank thread can compute the reduction total as
* reduction_total = dev.team_scan( value ) + value ;
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const
{ return this-> template team_scan<Type>( value , 0 ); }
//----------------------------------------
// Private for the driver
private:
typedef execution_space::scratch_memory_space space ;
public:
template< class Arg0 , class Arg1 >
inline
OpenMPexecTeamMember( Impl::OpenMPexec & exec
, const TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > & team
, const int shmem_size
)
: m_exec( exec )
, m_team_shared(0,0)
, m_team_shmem( shmem_size )
, m_team_base_rev(0)
, m_team_rank_rev(0)
, m_team_rank(0)
, m_team_size( team.team_size() )
, m_league_rank(0)
, m_league_end(0)
, m_league_size( team.league_size() )
{
const int pool_rank_rev = m_exec.pool_rank_rev();
const int pool_team_rank_rev = pool_rank_rev % team.team_alloc();
const int pool_league_rank_rev = pool_rank_rev / team.team_alloc();
const int league_iter_end = team.league_size() - pool_league_rank_rev * team.team_iter();
if ( pool_team_rank_rev < m_team_size && 0 < league_iter_end ) {
m_team_base_rev = team.team_alloc() * pool_league_rank_rev ;
m_team_rank_rev = pool_team_rank_rev ;
m_team_rank = m_team_size - ( m_team_rank_rev + 1 );
m_league_end = league_iter_end ;
m_league_rank = league_iter_end > team.team_iter() ? league_iter_end - team.team_iter() : 0 ;
new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_shmem );
}
}
bool valid() const
{ return m_league_rank < m_league_end ; }
void next()
{
if ( ++m_league_rank < m_league_end ) {
team_barrier();
new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_shmem );
}
}
static inline int team_reduce_size() { return TEAM_REDUCE_SIZE ; }
};
} // namespace Impl
template< class Arg0 , class Arg1 >
class TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP >
{
public:
//! Tag this class as a kokkos execution policy
typedef TeamPolicy execution_policy ;
//! Execution space of this execution policy.
typedef Kokkos::OpenMP execution_space ;
typedef typename
Impl::if_c< ! Impl::is_same< Kokkos::OpenMP , Arg0 >::value , Arg0 , Arg1 >::type
work_tag ;
//----------------------------------------
template< class FunctorType >
inline static
int team_size_max( const FunctorType & )
{ return execution_space::thread_pool_size(1); }
template< class FunctorType >
inline static
int team_size_recommended( const FunctorType & )
{ return execution_space::thread_pool_size(2); }
template< class FunctorType >
inline static
int team_size_recommended( const FunctorType &, const int& )
{ return execution_space::thread_pool_size(2); }
//----------------------------------------
private:
int m_league_size ;
int m_team_size ;
int m_team_alloc ;
int m_team_iter ;
inline void init( const int league_size_request
, const int team_size_request )
{
const int pool_size = execution_space::thread_pool_size(0);
const int team_max = execution_space::thread_pool_size(1);
const int team_grain = execution_space::thread_pool_size(2);
m_league_size = league_size_request ;
m_team_size = team_size_request < team_max ?
team_size_request : team_max ;
// Round team size up to a multiple of 'team_gain'
const int team_size_grain = team_grain * ( ( m_team_size + team_grain - 1 ) / team_grain );
const int team_count = pool_size / team_size_grain ;
// Constraint : pool_size = m_team_alloc * team_count
m_team_alloc = pool_size / team_count ;
// Maxumum number of iterations each team will take:
m_team_iter = ( m_league_size + team_count - 1 ) / team_count ;
}
public:
inline int team_size() const { return m_team_size ; }
inline int league_size() const { return m_league_size ; }
/** \brief Specify league size, request team size */
TeamPolicy( execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1)
{ init( league_size_request , team_size_request ); (void) vector_length_request; }
TeamPolicy( int league_size_request , int team_size_request , int vector_length_request = 1 )
{ init( league_size_request , team_size_request ); (void) vector_length_request; }
inline int team_alloc() const { return m_team_alloc ; }
inline int team_iter() const { return m_team_iter ; }
typedef Impl::OpenMPexecTeamMember member_type ;
};
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
inline
int OpenMP::thread_pool_size( int depth )
{
return Impl::OpenMPexec::pool_size(depth);
}
KOKKOS_INLINE_FUNCTION
int OpenMP::thread_pool_rank()
{
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
return Impl::OpenMPexec::m_map_rank[ omp_get_thread_num() ];
#else
return -1 ;
#endif
}
} // namespace Kokkos
namespace Kokkos {
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>
TeamThreadRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) {
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>(thread,count);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>
TeamThreadRange(const Impl::OpenMPexecTeamMember& thread, const iType& begin, const iType& end) {
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>(thread,begin,end);
}
template<typename iType>
KOKKOS_INLINE_FUNCTION
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >
ThreadVectorRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) {
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >(thread,count);
}
KOKKOS_INLINE_FUNCTION
Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember> PerTeam(const Impl::OpenMPexecTeamMember& thread) {
return Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>(thread);
}
KOKKOS_INLINE_FUNCTION
Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember> PerThread(const Impl::OpenMPexecTeamMember& thread) {
return Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>(thread);
}
} // namespace Kokkos
namespace Kokkos {
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries, const Lambda& lambda) {
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
}
/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries,
const Lambda & lambda, ValueType& result) {
result = ValueType();
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
result+=tmp;
}
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries,
const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
join(result,tmp);
}
init_result = loop_boundaries.thread.team_reduce(result,join);
}
} //namespace Kokkos
namespace Kokkos {
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
* This functionality requires C++11 support.*/
template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
lambda(i);
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of
* val is performed and put into result. This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
result+=tmp;
}
}
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
* '1 for *'). This functionality requires C++11 support.*/
template< typename iType, class Lambda, typename ValueType, class JoinType >
KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
ValueType tmp = ValueType();
lambda(i,tmp);
join(result,tmp);
}
init_result = result;
}
/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final)
* for each i=0..N-1.
*
* The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed.
* Depending on the target execution space the operator might be called twice: once with final=false
* and once with final=true. When final==true val contains the prefix sum value. The contribution of this
* "i" needs to be added to val no matter whether final==true or not. In a serial execution
* (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set
* to the final sum value over all vector lanes.
* This functionality requires C++11 support.*/
template< typename iType, class FunctorType >
KOKKOS_INLINE_FUNCTION
void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
loop_boundaries, const FunctorType & lambda) {
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
typedef typename ValueTraits::value_type value_type ;
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
lambda(i,scan_val,true);
}
}
} // namespace Kokkos
namespace Kokkos {
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda) {
lambda();
}
template<class FunctorType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda) {
if(single_struct.team_member.team_rank()==0) lambda();
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) {
lambda(val);
}
template<class FunctorType, class ValueType>
KOKKOS_INLINE_FUNCTION
void single(const Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) {
if(single_struct.team_member.team_rank()==0) {
lambda(val);
}
single_struct.team_member.team_broadcast(val,0);
}
}
#endif /* #ifndef KOKKOS_OPENMPEXEC_HPP */

View File

@ -1,484 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_QTHREAD )
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <sstream>
#include <utility>
#include <Kokkos_Qthread.hpp>
#include <Kokkos_Atomic.hpp>
#include <impl/Kokkos_Error.hpp>
// Defines to enable experimental Qthread functionality
#define QTHREAD_LOCAL_PRIORITY
#define CLONED_TASKS
#include <qthread/qthread.h>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
namespace {
enum { MAXIMUM_QTHREAD_WORKERS = 1024 };
/** s_exec is indexed by the reverse rank of the workers
* for faster fan-in / fan-out lookups
* [ n - 1 , n - 2 , ... , 0 ]
*/
QthreadExec * s_exec[ MAXIMUM_QTHREAD_WORKERS ];
int s_number_shepherds = 0 ;
int s_number_workers_per_shepherd = 0 ;
int s_number_workers = 0 ;
inline
QthreadExec ** worker_exec()
{
return s_exec + s_number_workers - ( qthread_shep() * s_number_workers_per_shepherd + qthread_worker_local(NULL) + 1 );
}
const int s_base_size = QthreadExec::align_alloc( sizeof(QthreadExec) );
int s_worker_reduce_end = 0 ; /* End of worker reduction memory */
int s_worker_shared_end = 0 ; /* Total of worker scratch memory */
int s_worker_shared_begin = 0 ; /* Beginning of worker shared memory */
QthreadExecFunctionPointer volatile s_active_function = 0 ;
const void * volatile s_active_function_arg = 0 ;
} /* namespace */
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
namespace Kokkos {
void Qthread::initialize( int thread_count )
{
// Environment variable: QTHREAD_NUM_SHEPHERDS
// Environment variable: QTHREAD_NUM_WORKERS_PER_SHEP
// Environment variable: QTHREAD_HWPAR
{
char buffer[256];
snprintf(buffer,sizeof(buffer),"QTHREAD_HWPAR=%d",thread_count);
putenv(buffer);
}
const bool ok_init = ( QTHREAD_SUCCESS == qthread_initialize() ) &&
( thread_count == qthread_num_shepherds() * qthread_num_workers_local(NO_SHEPHERD) ) &&
( thread_count == qthread_num_workers() );
bool ok_symmetry = true ;
if ( ok_init ) {
Impl::s_number_shepherds = qthread_num_shepherds();
Impl::s_number_workers_per_shepherd = qthread_num_workers_local(NO_SHEPHERD);
Impl::s_number_workers = Impl::s_number_shepherds * Impl::s_number_workers_per_shepherd ;
for ( int i = 0 ; ok_symmetry && i < Impl::s_number_shepherds ; ++i ) {
ok_symmetry = ( Impl::s_number_workers_per_shepherd == qthread_num_workers_local(i) );
}
}
if ( ! ok_init || ! ok_symmetry ) {
std::ostringstream msg ;
msg << "Kokkos::Qthread::initialize(" << thread_count << ") FAILED" ;
msg << " : qthread_num_shepherds = " << qthread_num_shepherds();
msg << " : qthread_num_workers_per_shepherd = " << qthread_num_workers_local(NO_SHEPHERD);
msg << " : qthread_num_workers = " << qthread_num_workers();
if ( ! ok_symmetry ) {
msg << " : qthread_num_workers_local = {" ;
for ( int i = 0 ; i < Impl::s_number_shepherds ; ++i ) {
msg << " " << qthread_num_workers_local(i) ;
}
msg << " }" ;
}
Impl::s_number_workers = 0 ;
Impl::s_number_shepherds = 0 ;
Impl::s_number_workers_per_shepherd = 0 ;
if ( ok_init ) { qthread_finalize(); }
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
Impl::QthreadExec::resize_worker_scratch( 256 , 256 );
// Init the array for used for arbitrarily sized atomics
Impl::init_lock_array_host_space();
}
void Qthread::finalize()
{
Impl::QthreadExec::clear_workers();
if ( Impl::s_number_workers ) {
qthread_finalize();
}
Impl::s_number_workers = 0 ;
Impl::s_number_shepherds = 0 ;
Impl::s_number_workers_per_shepherd = 0 ;
}
void Qthread::print_configuration( std::ostream & s , const bool detail )
{
s << "Kokkos::Qthread {"
<< " num_shepherds(" << Impl::s_number_shepherds << ")"
<< " num_workers_per_shepherd(" << Impl::s_number_workers_per_shepherd << ")"
<< " }" << std::endl ;
}
Qthread & Qthread::instance( int )
{
static Qthread q ;
return q ;
}
void Qthread::fence()
{
}
int Qthread::shepherd_size() const { return Impl::s_number_shepherds ; }
int Qthread::shepherd_worker_size() const { return Impl::s_number_workers_per_shepherd ; }
} /* namespace Kokkos */
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
namespace {
aligned_t driver_exec_all( void * arg )
{
QthreadExec & exec = **worker_exec();
(*s_active_function)( exec , s_active_function_arg );
/*
fprintf( stdout
, "QthreadExec driver worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n"
, exec.worker_rank()
, exec.worker_size()
, exec.shepherd_rank()
, exec.shepherd_size()
, exec.shepherd_worker_rank()
, exec.shepherd_worker_size()
);
fflush(stdout);
*/
return 0 ;
}
aligned_t driver_resize_worker_scratch( void * arg )
{
static volatile int lock_begin = 0 ;
static volatile int lock_end = 0 ;
QthreadExec ** const exec = worker_exec();
//----------------------------------------
// Serialize allocation for thread safety
while ( ! atomic_compare_exchange_strong( & lock_begin , 0 , 1 ) ); // Spin wait to claim lock
const bool ok = 0 == *exec ;
if ( ok ) { *exec = (QthreadExec *) malloc( s_base_size + s_worker_shared_end ); }
lock_begin = 0 ; // release lock
if ( ok ) { new( *exec ) QthreadExec(); }
//----------------------------------------
// Wait for all calls to complete to insure that each worker has executed.
if ( s_number_workers == 1 + atomic_fetch_add( & lock_end , 1 ) ) { lock_end = 0 ; }
while ( lock_end );
/*
fprintf( stdout
, "QthreadExec resize worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n"
, (**exec).worker_rank()
, (**exec).worker_size()
, (**exec).shepherd_rank()
, (**exec).shepherd_size()
, (**exec).shepherd_worker_rank()
, (**exec).shepherd_worker_size()
);
fflush(stdout);
*/
//----------------------------------------
if ( ! ok ) {
fprintf( stderr , "Kokkos::QthreadExec resize failed\n" );
fflush( stderr );
}
return 0 ;
}
void verify_is_process( const char * const label , bool not_active = false )
{
const bool not_process = 0 != qthread_shep() || 0 != qthread_worker_local(NULL);
const bool is_active = not_active && ( s_active_function || s_active_function_arg );
if ( not_process || is_active ) {
std::string msg( label );
msg.append( " : FAILED" );
if ( not_process ) msg.append(" : not called by main process");
if ( is_active ) msg.append(" : parallel execution in progress");
Kokkos::Impl::throw_runtime_exception( msg );
}
}
}
int QthreadExec::worker_per_shepherd()
{
return s_number_workers_per_shepherd ;
}
QthreadExec::QthreadExec()
{
const int shepherd_rank = qthread_shep();
const int shepherd_worker_rank = qthread_worker_local(NULL);
const int worker_rank = shepherd_rank * s_number_workers_per_shepherd + shepherd_worker_rank ;
m_worker_base = s_exec ;
m_shepherd_base = s_exec + s_number_workers_per_shepherd * ( ( s_number_shepherds - ( shepherd_rank + 1 ) ) );
m_scratch_alloc = ( (unsigned char *) this ) + s_base_size ;
m_reduce_end = s_worker_reduce_end ;
m_shepherd_rank = shepherd_rank ;
m_shepherd_size = s_number_shepherds ;
m_shepherd_worker_rank = shepherd_worker_rank ;
m_shepherd_worker_size = s_number_workers_per_shepherd ;
m_worker_rank = worker_rank ;
m_worker_size = s_number_workers ;
m_worker_state = QthreadExec::Active ;
}
void QthreadExec::clear_workers()
{
for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) {
QthreadExec * const exec = s_exec[iwork] ;
s_exec[iwork] = 0 ;
free( exec );
}
}
void QthreadExec::shared_reset( Qthread::scratch_memory_space & space )
{
new( & space )
Qthread::scratch_memory_space(
((unsigned char *) (**m_shepherd_base).m_scratch_alloc ) + s_worker_shared_begin ,
s_worker_shared_end - s_worker_shared_begin
);
}
void QthreadExec::resize_worker_scratch( const int reduce_size , const int shared_size )
{
const int exec_all_reduce_alloc = align_alloc( reduce_size );
const int shepherd_scan_alloc = align_alloc( 8 );
const int shepherd_shared_end = exec_all_reduce_alloc + shepherd_scan_alloc + align_alloc( shared_size );
if ( s_worker_reduce_end < exec_all_reduce_alloc ||
s_worker_shared_end < shepherd_shared_end ) {
/*
fprintf( stdout , "QthreadExec::resize\n");
fflush(stdout);
*/
// Clear current worker memory before allocating new worker memory
clear_workers();
// Increase the buffers to an aligned allocation
s_worker_reduce_end = exec_all_reduce_alloc ;
s_worker_shared_begin = exec_all_reduce_alloc + shepherd_scan_alloc ;
s_worker_shared_end = shepherd_shared_end ;
// Need to query which shepherd this main 'process' is running...
const int main_shep = qthread_shep();
// Have each worker resize its memory for proper first-touch
#if 0
for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) {
for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i ) {
qthread_fork_to( driver_resize_worker_scratch , NULL , NULL , jshep );
}}
#else
// If this function is used before the 'qthread.task_policy' unit test
// the 'qthread.task_policy' unit test fails with a seg-fault within libqthread.so.
for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) {
const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1 ;
if ( num_clone ) {
const int ret = qthread_fork_clones_to_local_priority
( driver_resize_worker_scratch /* function */
, NULL /* function data block */
, NULL /* pointer to return value feb */
, jshep /* shepherd number */
, num_clone - 1 /* number of instances - 1 */
);
assert(ret == QTHREAD_SUCCESS);
}
}
#endif
driver_resize_worker_scratch( NULL );
// Verify all workers allocated
bool ok = true ;
for ( int iwork = 0 ; ok && iwork < s_number_workers ; ++iwork ) { ok = 0 != s_exec[iwork] ; }
if ( ! ok ) {
std::ostringstream msg ;
msg << "Kokkos::Impl::QthreadExec::resize : FAILED for workers {" ;
for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) {
if ( 0 == s_exec[iwork] ) { msg << " " << ( s_number_workers - ( iwork + 1 ) ); }
}
msg << " }" ;
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
}
}
void QthreadExec::exec_all( Qthread & , QthreadExecFunctionPointer func , const void * arg )
{
verify_is_process("QthreadExec::exec_all(...)",true);
/*
fprintf( stdout , "QthreadExec::exec_all\n");
fflush(stdout);
*/
s_active_function = func ;
s_active_function_arg = arg ;
// Need to query which shepherd this main 'process' is running...
const int main_shep = qthread_shep();
#if 0
for ( int jshep = 0 , iwork = 0 ; jshep < s_number_shepherds ; ++jshep ) {
for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i , ++iwork ) {
qthread_fork_to( driver_exec_all , NULL , NULL , jshep );
}}
#else
// If this function is used before the 'qthread.task_policy' unit test
// the 'qthread.task_policy' unit test fails with a seg-fault within libqthread.so.
for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) {
const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1 ;
if ( num_clone ) {
const int ret = qthread_fork_clones_to_local_priority
( driver_exec_all /* function */
, NULL /* function data block */
, NULL /* pointer to return value feb */
, jshep /* shepherd number */
, num_clone - 1 /* number of instances - 1 */
);
assert(ret == QTHREAD_SUCCESS);
}
}
#endif
driver_exec_all( NULL );
s_active_function = 0 ;
s_active_function_arg = 0 ;
}
void * QthreadExec::exec_all_reduce_result()
{
return s_exec[0]->m_scratch_alloc ;
}
} /* namespace Impl */
} /* namespace Kokkos */
namespace Kokkos {
namespace Impl {
QthreadTeamPolicyMember::QthreadTeamPolicyMember()
: m_exec( **worker_exec() )
, m_team_shared(0,0)
, m_team_size( 1 ) // s_number_workers_per_shepherd )
, m_team_rank( 0 ) // m_exec.shepherd_worker_rank() )
, m_league_size(1)
, m_league_end(1)
, m_league_rank(0)
{
m_exec.shared_reset( m_team_shared );
}
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */

View File

@ -1,614 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_QTHREADEXEC_HPP
#define KOKKOS_QTHREADEXEC_HPP
#include <impl/Kokkos_spinwait.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
//----------------------------------------------------------------------------
class QthreadExec ;
typedef void (*QthreadExecFunctionPointer)( QthreadExec & , const void * );
class QthreadExec {
private:
enum { Inactive = 0 , Active = 1 };
const QthreadExec * const * m_worker_base ;
const QthreadExec * const * m_shepherd_base ;
void * m_scratch_alloc ; ///< Scratch memory [ reduce , team , shared ]
int m_reduce_end ; ///< End of scratch reduction memory
int m_shepherd_rank ;
int m_shepherd_size ;
int m_shepherd_worker_rank ;
int m_shepherd_worker_size ;
/*
* m_worker_rank = m_shepherd_rank * m_shepherd_worker_size + m_shepherd_worker_rank
* m_worker_size = m_shepherd_size * m_shepherd_worker_size
*/
int m_worker_rank ;
int m_worker_size ;
int mutable volatile m_worker_state ;
friend class Kokkos::Qthread ;
~QthreadExec();
QthreadExec( const QthreadExec & );
QthreadExec & operator = ( const QthreadExec & );
public:
QthreadExec();
/** Execute the input function on all available Qthread workers */
static void exec_all( Qthread & , QthreadExecFunctionPointer , const void * );
//----------------------------------------
/** Barrier across all workers participating in the 'exec_all' */
void exec_all_barrier() const
{
const int rev_rank = m_worker_size - ( m_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
Impl::spinwait( m_worker_base[j]->m_worker_state , QthreadExec::Active );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
m_worker_base[j]->m_worker_state = QthreadExec::Active ;
}
}
/** Barrier across workers within the shepherd with rank < team_rank */
void shepherd_barrier( const int team_size ) const
{
if ( m_shepherd_worker_rank < team_size ) {
const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
m_shepherd_base[j]->m_worker_state = QthreadExec::Active ;
}
}
}
//----------------------------------------
/** Reduce across all workers participating in the 'exec_all' */
template< class FunctorType , class ArgTag >
inline
void exec_all_reduce( const FunctorType & func ) const
{
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
const int rev_rank = m_worker_size - ( m_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
const QthreadExec & fan = *m_worker_base[j];
Impl::spinwait( fan.m_worker_state , QthreadExec::Active );
ValueJoin::join( func , m_scratch_alloc , fan.m_scratch_alloc );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
m_worker_base[j]->m_worker_state = QthreadExec::Active ;
}
}
//----------------------------------------
/** Scall across all workers participating in the 'exec_all' */
template< class FunctorType , class ArgTag >
inline
void exec_all_scan( const FunctorType & func ) const
{
typedef Kokkos::Impl::FunctorValueInit< FunctorType , ArgTag > ValueInit ;
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
typedef Kokkos::Impl::FunctorValueOps< FunctorType , ArgTag > ValueOps ;
const int rev_rank = m_worker_size - ( m_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
Impl::spinwait( m_worker_base[j]->m_worker_state , QthreadExec::Active );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
else {
// Root thread scans across values before releasing threads
// Worker data is in reverse order, so m_worker_base[0] is the
// highest ranking thread.
// Copy from lower ranking to higher ranking worker.
for ( int i = 1 ; i < m_worker_size ; ++i ) {
ValueOps::copy( func
, m_worker_base[i-1]->m_scratch_alloc
, m_worker_base[i]->m_scratch_alloc
);
}
ValueInit::init( func , m_worker_base[m_worker_size-1]->m_scratch_alloc );
// Join from lower ranking to higher ranking worker.
// Value at m_worker_base[n-1] is zero so skip adding it to m_worker_base[n-2].
for ( int i = m_worker_size - 1 ; --i ; ) {
ValueJoin::join( func , m_worker_base[i-1]->m_scratch_alloc , m_worker_base[i]->m_scratch_alloc );
}
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
m_worker_base[j]->m_worker_state = QthreadExec::Active ;
}
}
//----------------------------------------
template< class Type>
inline
volatile Type * shepherd_team_scratch_value() const
{ return (volatile Type*)(((unsigned char *) m_scratch_alloc) + m_reduce_end); }
template< class Type >
inline
void shepherd_broadcast( Type & value , const int team_size , const int team_rank ) const
{
if ( m_shepherd_base ) {
Type * const shared_value = m_shepherd_base[0]->shepherd_team_scratch_value<Type>();
if ( m_shepherd_worker_rank == team_rank ) { *shared_value = value ; }
memory_fence();
shepherd_barrier( team_size );
value = *shared_value ;
}
}
template< class Type >
inline
Type shepherd_reduce( const int team_size , const Type & value ) const
{
*shepherd_team_scratch_value<Type>() = value ;
memory_fence();
const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
else {
Type & accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>();
for ( int i = 1 ; i < n ; ++i ) {
accum += * m_shepherd_base[i]->shepherd_team_scratch_value<Type>();
}
for ( int i = 1 ; i < n ; ++i ) {
* m_shepherd_base[i]->shepherd_team_scratch_value<Type>() = accum ;
}
memory_fence();
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
m_shepherd_base[j]->m_worker_state = QthreadExec::Active ;
}
return *shepherd_team_scratch_value<Type>();
}
template< class JoinOp >
inline
typename JoinOp::value_type
shepherd_reduce( const int team_size
, const typename JoinOp::value_type & value
, const JoinOp & op ) const
{
typedef typename JoinOp::value_type Type ;
*shepherd_team_scratch_value<Type>() = value ;
memory_fence();
const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
else {
volatile Type & accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>();
for ( int i = 1 ; i < team_size ; ++i ) {
op.join( accum , * m_shepherd_base[i]->shepherd_team_scratch_value<Type>() );
}
for ( int i = 1 ; i < team_size ; ++i ) {
* m_shepherd_base[i]->shepherd_team_scratch_value<Type>() = accum ;
}
memory_fence();
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
m_shepherd_base[j]->m_worker_state = QthreadExec::Active ;
}
return *shepherd_team_scratch_value<Type>();
}
template< class Type >
inline
Type shepherd_scan( const int team_size
, const Type & value
, Type * const global_value = 0 ) const
{
*shepherd_team_scratch_value<Type>() = value ;
memory_fence();
const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 );
int n , j ;
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active );
}
if ( rev_rank ) {
m_worker_state = QthreadExec::Inactive ;
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
}
else {
// Root thread scans across values before releasing threads
// Worker data is in reverse order, so m_shepherd_base[0] is the
// highest ranking thread.
// Copy from lower ranking to higher ranking worker.
Type accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>();
for ( int i = 1 ; i < team_size ; ++i ) {
const Type tmp = * m_shepherd_base[i]->shepherd_team_scratch_value<Type>();
accum += tmp ;
* m_shepherd_base[i-1]->shepherd_team_scratch_value<Type>() = tmp ;
}
* m_shepherd_base[team_size-1]->shepherd_team_scratch_value<Type>() =
global_value ? atomic_fetch_add( global_value , accum ) : 0 ;
// Join from lower ranking to higher ranking worker.
for ( int i = team_size ; --i ; ) {
* m_shepherd_base[i-1]->shepherd_team_scratch_value<Type>() += * m_shepherd_base[i]->shepherd_team_scratch_value<Type>();
}
memory_fence();
}
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
m_shepherd_base[j]->m_worker_state = QthreadExec::Active ;
}
return *shepherd_team_scratch_value<Type>();
}
//----------------------------------------
static inline
int align_alloc( int size )
{
enum { ALLOC_GRAIN = 1 << 6 /* power of two, 64bytes */};
enum { ALLOC_GRAIN_MASK = ALLOC_GRAIN - 1 };
return ( size + ALLOC_GRAIN_MASK ) & ~ALLOC_GRAIN_MASK ;
}
void shared_reset( Qthread::scratch_memory_space & );
void * exec_all_reduce_value() const { return m_scratch_alloc ; }
static void * exec_all_reduce_result();
static void resize_worker_scratch( const int reduce_size , const int shared_size );
static void clear_workers();
//----------------------------------------
inline int worker_rank() const { return m_worker_rank ; }
inline int worker_size() const { return m_worker_size ; }
inline int shepherd_worker_rank() const { return m_shepherd_worker_rank ; }
inline int shepherd_worker_size() const { return m_shepherd_worker_size ; }
inline int shepherd_rank() const { return m_shepherd_rank ; }
inline int shepherd_size() const { return m_shepherd_size ; }
static int worker_per_shepherd();
};
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
class QthreadTeamPolicyMember {
private:
typedef Kokkos::Qthread execution_space ;
typedef execution_space::scratch_memory_space scratch_memory_space ;
Impl::QthreadExec & m_exec ;
scratch_memory_space m_team_shared ;
const int m_team_size ;
const int m_team_rank ;
const int m_league_size ;
const int m_league_end ;
int m_league_rank ;
public:
KOKKOS_INLINE_FUNCTION
const scratch_memory_space & team_shmem() const { return m_team_shared ; }
KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; }
KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; }
KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; }
KOKKOS_INLINE_FUNCTION void team_barrier() const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{}
#else
{ m_exec.shepherd_barrier( m_team_size ); }
#endif
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_broadcast( const Type & value , int rank ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return Type(); }
#else
{ return m_exec.template shepherd_broadcast<Type>( value , m_team_size , rank ); }
#endif
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_reduce( const Type & value ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return Type(); }
#else
{ return m_exec.template shepherd_reduce<Type>( m_team_size , value ); }
#endif
template< typename JoinOp >
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
team_reduce( const typename JoinOp::value_type & value
, const JoinOp & op ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return typename JoinOp::value_type(); }
#else
{ return m_exec.template shepherd_reduce<JoinOp>( m_team_size , value , op ); }
#endif
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
*
* The highest rank thread can compute the reduction total as
* reduction_total = dev.team_scan( value ) + value ;
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return Type(); }
#else
{ return m_exec.template shepherd_scan<Type>( m_team_size , value ); }
#endif
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
* with intra-team non-deterministic ordering accumulation.
*
* The global inter-team accumulation value will, at the end of the
* league's parallel execution, be the scan's total.
* Parallel execution ordering of the league's teams is non-deterministic.
* As such the base value for each team's scan operation is similarly
* non-deterministic.
*/
template< typename Type >
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
{ return Type(); }
#else
{ return m_exec.template shepherd_scan<Type>( m_team_size , value , global_accum ); }
#endif
//----------------------------------------
// Private driver for task-team parallel
QthreadTeamPolicyMember();
//----------------------------------------
// Private for the driver ( for ( member_type i(exec,team); i ; i.next_team() ) { ... }
// Initialize
template< class Arg0 , class Arg1 >
QthreadTeamPolicyMember( Impl::QthreadExec & exec , const TeamPolicy<Arg0,Arg1,Qthread> & team )
: m_exec( exec )
, m_team_shared(0,0)
, m_team_size( team.m_team_size )
, m_team_rank( exec.shepherd_worker_rank() )
, m_league_size( team.m_league_size )
, m_league_end( team.m_league_size - team.m_shepherd_iter * ( exec.shepherd_size() - ( exec.shepherd_rank() + 1 ) ) )
, m_league_rank( m_league_end > team.m_shepherd_iter ? m_league_end - team.m_shepherd_iter : 0 )
{
m_exec.shared_reset( m_team_shared );
}
// Continue
operator bool () const { return m_league_rank < m_league_end ; }
// iterate
void next_team() { ++m_league_rank ; m_exec.shared_reset( m_team_shared ); }
};
} // namespace Impl
template< class Arg0 , class Arg1 >
class TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread >
{
private:
const int m_league_size ;
const int m_team_size ;
const int m_shepherd_iter ;
public:
//! Tag this class as a kokkos execution policy
typedef TeamPolicy execution_policy ;
typedef Qthread execution_space ;
typedef typename
Impl::if_c< ! Impl::is_same< Kokkos::Qthread , Arg0 >::value , Arg0 , Arg1 >::type
work_tag ;
//----------------------------------------
template< class FunctorType >
inline static
int team_size_max( const FunctorType & )
{ return Qthread::instance().shepherd_worker_size(); }
template< class FunctorType >
static int team_size_recommended( const FunctorType & f )
{ return team_size_max( f ); }
template< class FunctorType >
inline static
int team_size_recommended( const FunctorType & f , const int& )
{ return team_size_max( f ); }
//----------------------------------------
inline int team_size() const { return m_team_size ; }
inline int league_size() const { return m_league_size ; }
// One active team per shepherd
TeamPolicy( Kokkos::Qthread & q
, const int league_size
, const int team_size
)
: m_league_size( league_size )
, m_team_size( team_size < q.shepherd_worker_size()
? team_size : q.shepherd_worker_size() )
, m_shepherd_iter( ( league_size + q.shepherd_size() - 1 ) / q.shepherd_size() )
{
}
// One active team per shepherd
TeamPolicy( const int league_size
, const int team_size
)
: m_league_size( league_size )
, m_team_size( team_size < Qthread::instance().shepherd_worker_size()
? team_size : Qthread::instance().shepherd_worker_size() )
, m_shepherd_iter( ( league_size + Qthread::instance().shepherd_size() - 1 ) / Qthread::instance().shepherd_size() )
{
}
typedef Impl::QthreadTeamPolicyMember member_type ;
friend class Impl::QthreadTeamPolicyMember ;
};
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #define KOKKOS_QTHREADEXEC_HPP */

Some files were not shown because too many files have changed in this diff Show More