git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@13583 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
40
lib/kokkos/Copyright.txt
Executable file
40
lib/kokkos/Copyright.txt
Executable file
@ -0,0 +1,40 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
40
lib/kokkos/LICENSE
Executable file
40
lib/kokkos/LICENSE
Executable file
@ -0,0 +1,40 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
318
lib/kokkos/Makefile.kokkos
Executable file
318
lib/kokkos/Makefile.kokkos
Executable file
@ -0,0 +1,318 @@
|
|||||||
|
# Default settings common options
|
||||||
|
|
||||||
|
KOKKOS_PATH=../../lib/kokkos
|
||||||
|
|
||||||
|
#Options: OpenMP,Serial,Pthreads,Cuda
|
||||||
|
KOKKOS_DEVICES ?= "OpenMP"
|
||||||
|
#KOKKOS_DEVICES ?= "Pthreads"
|
||||||
|
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,ARMv8,BGQ,Power7,Power8
|
||||||
|
KOKKOS_ARCH ?= ""
|
||||||
|
#Options: yes,no
|
||||||
|
KOKKOS_DEBUG ?= "no"
|
||||||
|
#Options: hwloc,librt
|
||||||
|
KOKKOS_USE_TPLS ?= ""
|
||||||
|
|
||||||
|
#Default settings specific options
|
||||||
|
#Options: force_uvm,use_ldg,rdc
|
||||||
|
KOKKOS_CUDA_OPTIONS ?= ""
|
||||||
|
|
||||||
|
# Check for general settings
|
||||||
|
|
||||||
|
KOKKOS_CXX_STANDARD ?= "c++11"
|
||||||
|
|
||||||
|
KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
|
||||||
|
KOKKOS_INTERNAL_ENABLE_PROFILING_COLLECT_KERNEL_DATA := $(strip $(shell echo $(KOKKOS_PROFILING) | grep "kernel_times" | wc -l))
|
||||||
|
KOKKOS_INTERNAL_ENABLE_PROFILING_AGGREGATE_MPI := $(strip $(shell echo $(KOKKOS_PROFILING) | grep "aggregate_mpi" | wc -l))
|
||||||
|
KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
|
||||||
|
|
||||||
|
# Check for external libraries
|
||||||
|
KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l))
|
||||||
|
|
||||||
|
# Check for advanced settings
|
||||||
|
KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l))
|
||||||
|
KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l))
|
||||||
|
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
|
||||||
|
|
||||||
|
# Check for Kokkos Host Execution Spaces one of which must be on
|
||||||
|
|
||||||
|
KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l))
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
|
||||||
|
KOKKOS_INTERNAL_USE_SERIAL := 1
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version | grep PGI | wc -l)
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||||
|
KOKKOS_INTERNAL_OPENMP_FLAG := -mp
|
||||||
|
else
|
||||||
|
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||||
|
KOKKOS_INTERNAL_CXX11_FLAG := --c++11
|
||||||
|
else
|
||||||
|
KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
|
||||||
|
endif
|
||||||
|
# Check for other Execution Spaces
|
||||||
|
|
||||||
|
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
|
||||||
|
|
||||||
|
# Check for Kokkos Architecture settings
|
||||||
|
|
||||||
|
#Intel based
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
|
||||||
|
|
||||||
|
#NVIDIA based
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||||
|
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||||
|
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||||
|
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||||
|
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||||
|
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||||
|
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||||
|
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||||
|
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||||
|
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||||
|
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||||
|
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||||
|
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
|
||||||
|
endif
|
||||||
|
|
||||||
|
#ARM based
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l))
|
||||||
|
|
||||||
|
#IBM based
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc))
|
||||||
|
|
||||||
|
#AMD based
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
|
||||||
|
|
||||||
|
#Any AVX?
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW) | bc ))
|
||||||
|
|
||||||
|
#Incompatible flags?
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc ))
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
|
||||||
|
$(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
|
||||||
|
endif
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1)
|
||||||
|
$(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
|
||||||
|
endif
|
||||||
|
|
||||||
|
#Generating the list of Flags
|
||||||
|
|
||||||
|
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
|
||||||
|
# No warnings:
|
||||||
|
KOKKOS_CXXFLAGS =
|
||||||
|
# INTEL and CLANG warnings:
|
||||||
|
#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
|
||||||
|
# GCC warnings:
|
||||||
|
#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized -Wignored-qualifiers -Wempty-body -Wclobbered
|
||||||
|
|
||||||
|
KOKKOS_LIBS = -lkokkos
|
||||||
|
KOKKOS_LDFLAGS = -L$(shell pwd)
|
||||||
|
KOKKOS_SRC =
|
||||||
|
KOKKOS_HEADERS =
|
||||||
|
|
||||||
|
#Generating the KokkosCore_config.h file
|
||||||
|
|
||||||
|
tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
|
||||||
|
tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp)
|
||||||
|
tmp := $(shell date >> KokkosCore_config.tmp)
|
||||||
|
tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp)
|
||||||
|
|
||||||
|
|
||||||
|
tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp)
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||||
|
tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||||
|
tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp )
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||||
|
tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp )
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
|
||||||
|
endif
|
||||||
|
|
||||||
|
tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
|
||||||
|
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
|
||||||
|
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
KOKKOS_CXXFLAGS += -G
|
||||||
|
endif
|
||||||
|
KOKKOS_CXXFLAGS += -g
|
||||||
|
KOKKOS_LDFLAGS += -g -ldl
|
||||||
|
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp )
|
||||||
|
tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp )
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
|
||||||
|
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
|
||||||
|
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
|
||||||
|
KOKKOS_LIBS += -lhwloc
|
||||||
|
tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp )
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
|
||||||
|
tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp )
|
||||||
|
tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp )
|
||||||
|
tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp )
|
||||||
|
KOKKOS_LIBS += -lrt
|
||||||
|
endif
|
||||||
|
|
||||||
|
tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
|
||||||
|
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
|
||||||
|
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
|
||||||
|
tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
|
||||||
|
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
|
||||||
|
KOKKOS_CXXFLAGS += --relocatable-device-code=true
|
||||||
|
KOKKOS_LDFLAGS += --relocatable-device-code=true
|
||||||
|
endif
|
||||||
|
|
||||||
|
#Add Architecture flags
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
|
||||||
|
KOKKOS_CXXFLAGS += -mavx
|
||||||
|
KOKKOS_LDFLAGS += -mavx
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
|
||||||
|
KOKKOS_CXXFLAGS += -xcore-avx2
|
||||||
|
KOKKOS_LDFLAGS += -xcore-avx2
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
|
||||||
|
KOKKOS_CXXFLAGS += -mmic
|
||||||
|
KOKKOS_LDFLAGS += -mmic
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
|
||||||
|
KOKKOS_CXXFLAGS += -arch=sm_30
|
||||||
|
endif
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
|
||||||
|
KOKKOS_CXXFLAGS += -arch=sm_32
|
||||||
|
endif
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
|
||||||
|
KOKKOS_CXXFLAGS += -arch=sm_35
|
||||||
|
endif
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
|
||||||
|
KOKKOS_CXXFLAGS += -arch=sm_37
|
||||||
|
endif
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
|
||||||
|
KOKKOS_CXXFLAGS += -arch=sm_50
|
||||||
|
endif
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
|
||||||
|
KOKKOS_CXXFLAGS += -arch=sm_52
|
||||||
|
endif
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
|
||||||
|
KOKKOS_CXXFLAGS += -arch=sm_53
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h)
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
|
||||||
|
KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l))
|
||||||
|
else
|
||||||
|
KOKKOS_INTERNAL_NEW_CONFIG := 1
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
|
||||||
|
tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h)
|
||||||
|
endif
|
||||||
|
|
||||||
|
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
|
||||||
|
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
|
||||||
|
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
|
||||||
|
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
|
||||||
|
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
|
||||||
|
|
||||||
|
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp)
|
||||||
|
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
|
||||||
|
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
|
||||||
|
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
|
||||||
|
KOKKOS_LIBS += -lcudart -lcuda
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||||
|
KOKKOS_LIBS += -lpthread
|
||||||
|
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
|
||||||
|
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||||
|
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
|
||||||
|
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||||
|
else
|
||||||
|
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||||
|
endif
|
||||||
|
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
# Setting up dependencies
|
||||||
|
|
||||||
|
KokkosCore_config.h:
|
||||||
|
|
||||||
|
KOKKOS_CPP_DEPENDS := KokkosCore_config.h $(KOKKOS_HEADERS)
|
||||||
|
|
||||||
|
KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o)
|
||||||
|
KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ))
|
||||||
|
|
||||||
|
include $(KOKKOS_PATH)/Makefile.targets
|
||||||
|
|
||||||
|
kokkos-clean:
|
||||||
|
rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
|
||||||
|
|
||||||
|
libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
|
||||||
|
ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
|
||||||
|
|
||||||
|
KOKKOS_LINK_DEPENDS=libkokkos.a
|
||||||
50
lib/kokkos/Makefile.targets
Executable file
50
lib/kokkos/Makefile.targets
Executable file
@ -0,0 +1,50 @@
|
|||||||
|
Kokkos_UnorderedMap_impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
|
||||||
|
Kokkos_AllocationTracker.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_AllocationTracker.cpp
|
||||||
|
Kokkos_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_BasicAllocators.cpp
|
||||||
|
Kokkos_Core.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp
|
||||||
|
Kokkos_Error.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp
|
||||||
|
Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
|
||||||
|
Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
|
||||||
|
Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
|
||||||
|
Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
|
||||||
|
Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
|
||||||
|
Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
|
||||||
|
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
|
||||||
|
KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
Kokkos_Cuda_BasicAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
|
||||||
|
Kokkos_Cuda_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
|
||||||
|
Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||||
|
Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
|
||||||
|
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
||||||
|
Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||||
|
Kokkos_OpenMPexec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
|
||||||
|
endif
|
||||||
|
|
||||||
97
lib/kokkos/README
Executable file
97
lib/kokkos/README
Executable file
@ -0,0 +1,97 @@
|
|||||||
|
Kokkos implements a programming model in C++ for writing performance portable
|
||||||
|
applications targeting all major HPC platforms. For that purpose it provides
|
||||||
|
abstractions for both parallel execution of code and data management.
|
||||||
|
Kokkos is designed to target complex node architectures with N-level memory
|
||||||
|
hierarchies and multiple types of execution resources. It currently can use
|
||||||
|
OpenMP, Pthreads and CUDA as backend programming models.
|
||||||
|
|
||||||
|
The core developers of Kokkos are Carter Edwards and Christian Trott
|
||||||
|
at the Computer Science Research Institute of the Sandia National
|
||||||
|
Laboratories.
|
||||||
|
|
||||||
|
The KokkosP interface and associated tools are developed by the Application
|
||||||
|
Performance Team and Kokkos core developers at Sandia National Laboratories.
|
||||||
|
|
||||||
|
To learn more about Kokkos consider watching one of our presentations:
|
||||||
|
GTC 2015:
|
||||||
|
http://on-demand.gputechconf.com/gtc/2015/video/S5166.html
|
||||||
|
http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf
|
||||||
|
|
||||||
|
A programming guide can be found under doc/Kokkos_PG.pdf. This is an initial version
|
||||||
|
and feedback is greatly appreciated.
|
||||||
|
|
||||||
|
For questions please send an email to
|
||||||
|
kokkos-users@software.sandia.gov
|
||||||
|
|
||||||
|
For non-public questions send an email to
|
||||||
|
hcedwar(at)sandia.gov and crtrott(at)sandia.gov
|
||||||
|
|
||||||
|
============================================================================
|
||||||
|
====Requirements============================================================
|
||||||
|
============================================================================
|
||||||
|
|
||||||
|
Primary tested compilers are:
|
||||||
|
GCC 4.7.2
|
||||||
|
GCC 5.1.0
|
||||||
|
Intel 14.0.1
|
||||||
|
Intel 15.0.1
|
||||||
|
Clang 3.7.0
|
||||||
|
|
||||||
|
Secondary tested compilers are:
|
||||||
|
CUDA 6.5
|
||||||
|
CUDA 7.0
|
||||||
|
|
||||||
|
Primary tested compiler are passing in release mode
|
||||||
|
with warnings as errors. We are using the following set
|
||||||
|
of flags:
|
||||||
|
GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits
|
||||||
|
-Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
|
||||||
|
Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
|
||||||
|
Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
|
||||||
|
|
||||||
|
|
||||||
|
============================================================================
|
||||||
|
====Getting started=========================================================
|
||||||
|
============================================================================
|
||||||
|
|
||||||
|
In the 'example/tutorial' directory you will find step by step tutorial
|
||||||
|
examples which explain many of the features of Kokkos. They work with
|
||||||
|
simple Makefiles. To build with g++ and OpenMP simply type 'make openmp'
|
||||||
|
in the 'example/tutorial' directory. This will build all examples in the
|
||||||
|
subfolders.
|
||||||
|
|
||||||
|
============================================================================
|
||||||
|
====Running Unit Tests======================================================
|
||||||
|
============================================================================
|
||||||
|
|
||||||
|
To run the unit tests create a build directory and run the following commands
|
||||||
|
|
||||||
|
KOKKOS_PATH/generate_makefile.bash
|
||||||
|
make build-test
|
||||||
|
make test
|
||||||
|
|
||||||
|
Run KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
|
||||||
|
changing the device type for which to build.
|
||||||
|
|
||||||
|
============================================================================
|
||||||
|
====Install the library=====================================================
|
||||||
|
============================================================================
|
||||||
|
|
||||||
|
To install Kokkos as a library create a build directory and run the following
|
||||||
|
|
||||||
|
KOKKOS_PATH/generate_makefile.bash --prefix=INSTALL_PATH
|
||||||
|
make lib
|
||||||
|
make install
|
||||||
|
|
||||||
|
KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
|
||||||
|
changing the device type for which to build.
|
||||||
|
|
||||||
|
============================================================================
|
||||||
|
====CMakeFiles==============================================================
|
||||||
|
============================================================================
|
||||||
|
|
||||||
|
The CMake files contained in this repository require Tribits and are used
|
||||||
|
for integration with Trilinos. They do not currently support a standalone
|
||||||
|
CMake build.
|
||||||
|
|
||||||
|
|
||||||
0
lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp
Executable file
0
lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp
Executable file
1691
lib/kokkos/algorithms/src/Kokkos_Random.hpp
Executable file
1691
lib/kokkos/algorithms/src/Kokkos_Random.hpp
Executable file
File diff suppressed because it is too large
Load Diff
496
lib/kokkos/algorithms/src/Kokkos_Sort.hpp
Executable file
496
lib/kokkos/algorithms/src/Kokkos_Sort.hpp
Executable file
@ -0,0 +1,496 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef KOKKOS_SORT_HPP_
|
||||||
|
#define KOKKOS_SORT_HPP_
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
namespace SortImpl {
|
||||||
|
|
||||||
|
template<class ValuesViewType, int Rank=ValuesViewType::Rank>
|
||||||
|
struct CopyOp;
|
||||||
|
|
||||||
|
template<class ValuesViewType>
|
||||||
|
struct CopyOp<ValuesViewType,1> {
|
||||||
|
template<class DstType, class SrcType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static void copy(DstType& dst, size_t i_dst,
|
||||||
|
SrcType& src, size_t i_src ) {
|
||||||
|
dst(i_dst) = src(i_src);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ValuesViewType>
|
||||||
|
struct CopyOp<ValuesViewType,2> {
|
||||||
|
template<class DstType, class SrcType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static void copy(DstType& dst, size_t i_dst,
|
||||||
|
SrcType& src, size_t i_src ) {
|
||||||
|
for(int j = 0;j< (int) dst.dimension_1(); j++)
|
||||||
|
dst(i_dst,j) = src(i_src,j);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ValuesViewType>
|
||||||
|
struct CopyOp<ValuesViewType,3> {
|
||||||
|
template<class DstType, class SrcType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static void copy(DstType& dst, size_t i_dst,
|
||||||
|
SrcType& src, size_t i_src ) {
|
||||||
|
for(int j = 0; j<dst.dimension_1(); j++)
|
||||||
|
for(int k = 0; k<dst.dimension_2(); k++)
|
||||||
|
dst(i_dst,j,k) = src(i_src,j,k);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class KeyViewType, class BinSortOp, class ExecutionSpace = typename KeyViewType::execution_space,
|
||||||
|
class SizeType = typename KeyViewType::memory_space::size_type>
|
||||||
|
class BinSort {
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
template<class ValuesViewType, class PermuteViewType, class CopyOp>
|
||||||
|
struct bin_sort_sort_functor {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef typename ValuesViewType::non_const_type values_view_type;
|
||||||
|
typedef typename ValuesViewType::const_type const_values_view_type;
|
||||||
|
Kokkos::View<typename values_view_type::const_data_type,typename values_view_type::array_layout,
|
||||||
|
typename values_view_type::memory_space,Kokkos::MemoryTraits<Kokkos::RandomAccess> > values;
|
||||||
|
values_view_type sorted_values;
|
||||||
|
typename PermuteViewType::const_type sort_order;
|
||||||
|
bin_sort_sort_functor(const_values_view_type values_, values_view_type sorted_values_, PermuteViewType sort_order_):
|
||||||
|
values(values_),sorted_values(sorted_values_),sort_order(sort_order_) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const int& i) const {
|
||||||
|
//printf("Sort: %i %i\n",i,sort_order(i));
|
||||||
|
CopyOp::copy(sorted_values,i,values,sort_order(i));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef BinSortOp bin_op_type;
|
||||||
|
|
||||||
|
struct bin_count_tag {};
|
||||||
|
struct bin_offset_tag {};
|
||||||
|
struct bin_binning_tag {};
|
||||||
|
struct bin_sort_bins_tag {};
|
||||||
|
|
||||||
|
public:
|
||||||
|
typedef SizeType size_type;
|
||||||
|
typedef size_type value_type;
|
||||||
|
|
||||||
|
typedef Kokkos::View<size_type*, execution_space> offset_type;
|
||||||
|
typedef Kokkos::View<const int*, execution_space> bin_count_type;
|
||||||
|
|
||||||
|
|
||||||
|
typedef Kokkos::View<typename KeyViewType::const_data_type,
|
||||||
|
typename KeyViewType::array_layout,
|
||||||
|
typename KeyViewType::memory_space> const_key_view_type;
|
||||||
|
typedef Kokkos::View<typename KeyViewType::const_data_type,
|
||||||
|
typename KeyViewType::array_layout,
|
||||||
|
typename KeyViewType::memory_space,
|
||||||
|
Kokkos::MemoryTraits<Kokkos::RandomAccess> > const_rnd_key_view_type;
|
||||||
|
|
||||||
|
typedef typename KeyViewType::non_const_value_type non_const_key_scalar;
|
||||||
|
typedef typename KeyViewType::const_value_type const_key_scalar;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const_key_view_type keys;
|
||||||
|
const_rnd_key_view_type keys_rnd;
|
||||||
|
|
||||||
|
public:
|
||||||
|
BinSortOp bin_op;
|
||||||
|
|
||||||
|
offset_type bin_offsets;
|
||||||
|
|
||||||
|
Kokkos::View<int*, ExecutionSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > bin_count_atomic;
|
||||||
|
bin_count_type bin_count_const;
|
||||||
|
|
||||||
|
offset_type sort_order;
|
||||||
|
|
||||||
|
bool sort_within_bins;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
// Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false)
|
||||||
|
BinSort(const_key_view_type keys_, BinSortOp bin_op_,
|
||||||
|
bool sort_within_bins_ = false)
|
||||||
|
:keys(keys_),keys_rnd(keys_), bin_op(bin_op_) {
|
||||||
|
|
||||||
|
bin_count_atomic = Kokkos::View<int*, ExecutionSpace >("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins());
|
||||||
|
bin_count_const = bin_count_atomic;
|
||||||
|
bin_offsets = offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins());
|
||||||
|
sort_order = offset_type("PermutationVector",keys.dimension_0());
|
||||||
|
sort_within_bins = sort_within_bins_;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed
|
||||||
|
void create_permute_vector() {
|
||||||
|
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_count_tag> (0,keys.dimension_0()),*this);
|
||||||
|
Kokkos::parallel_scan(Kokkos::RangePolicy<ExecutionSpace,bin_offset_tag> (0,bin_op.max_bins()) ,*this);
|
||||||
|
|
||||||
|
Kokkos::deep_copy(bin_count_atomic,0);
|
||||||
|
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_binning_tag> (0,keys.dimension_0()),*this);
|
||||||
|
|
||||||
|
if(sort_within_bins)
|
||||||
|
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort a view with respect ot the first dimension using the permutation array
|
||||||
|
template<class ValuesViewType>
|
||||||
|
void sort(ValuesViewType values) {
|
||||||
|
ValuesViewType sorted_values = ValuesViewType("Copy",
|
||||||
|
values.dimension_0(),
|
||||||
|
values.dimension_1(),
|
||||||
|
values.dimension_2(),
|
||||||
|
values.dimension_3(),
|
||||||
|
values.dimension_4(),
|
||||||
|
values.dimension_5(),
|
||||||
|
values.dimension_6(),
|
||||||
|
values.dimension_7());
|
||||||
|
|
||||||
|
parallel_for(values.dimension_0(),
|
||||||
|
bin_sort_sort_functor<ValuesViewType, offset_type,
|
||||||
|
SortImpl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
|
||||||
|
|
||||||
|
deep_copy(values,sorted_values);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the permutation vector
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
offset_type get_permute_vector() const { return sort_order;}
|
||||||
|
|
||||||
|
// Get the start offsets for each bin
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
offset_type get_bin_offsets() const { return bin_offsets;}
|
||||||
|
|
||||||
|
// Get the count for each bin
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
bin_count_type get_bin_count() const {return bin_count_const;}
|
||||||
|
|
||||||
|
public:
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const bin_count_tag& tag, const int& i) const {
|
||||||
|
bin_count_atomic(bin_op.bin(keys,i))++;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const bin_offset_tag& tag, const int& i, value_type& offset, const bool& final) const {
|
||||||
|
if(final) {
|
||||||
|
bin_offsets(i) = offset;
|
||||||
|
}
|
||||||
|
offset+=bin_count_const(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const bin_binning_tag& tag, const int& i) const {
|
||||||
|
const int bin = bin_op.bin(keys,i);
|
||||||
|
const int count = bin_count_atomic(bin)++;
|
||||||
|
|
||||||
|
sort_order(bin_offsets(bin) + count) = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const bin_sort_bins_tag& tag, const int&i ) const {
|
||||||
|
bool sorted = false;
|
||||||
|
int upper_bound = bin_offsets(i)+bin_count_const(i);
|
||||||
|
while(!sorted) {
|
||||||
|
sorted = true;
|
||||||
|
int old_idx = sort_order(bin_offsets(i));
|
||||||
|
int new_idx;
|
||||||
|
for(int k=bin_offsets(i)+1; k<upper_bound; k++) {
|
||||||
|
new_idx = sort_order(k);
|
||||||
|
|
||||||
|
if(!bin_op(keys_rnd,old_idx,new_idx)) {
|
||||||
|
sort_order(k-1) = new_idx;
|
||||||
|
sort_order(k) = old_idx;
|
||||||
|
sorted = false;
|
||||||
|
} else {
|
||||||
|
old_idx = new_idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
upper_bound--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace SortImpl {
|
||||||
|
|
||||||
|
template<class KeyViewType>
|
||||||
|
struct DefaultBinOp1D {
|
||||||
|
const int max_bins_;
|
||||||
|
const double mul_;
|
||||||
|
typename KeyViewType::const_value_type range_;
|
||||||
|
typename KeyViewType::const_value_type min_;
|
||||||
|
|
||||||
|
//Construct BinOp with number of bins, minimum value and maxuimum value
|
||||||
|
DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
|
||||||
|
typename KeyViewType::const_value_type max )
|
||||||
|
:max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {}
|
||||||
|
|
||||||
|
//Determine bin index from key value
|
||||||
|
template<class ViewType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
int bin(ViewType& keys, const int& i) const {
|
||||||
|
return int(mul_*(keys(i)-min_));
|
||||||
|
}
|
||||||
|
|
||||||
|
//Return maximum bin index + 1
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
int max_bins() const {
|
||||||
|
return max_bins_;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Compare to keys within a bin if true new_val will be put before old_val
|
||||||
|
template<class ViewType, typename iType1, typename iType2>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
bool operator()(ViewType& keys, iType1& i1, iType2& i2) const {
|
||||||
|
return keys(i1)<keys(i2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class KeyViewType>
|
||||||
|
struct DefaultBinOp3D {
|
||||||
|
int max_bins_[3];
|
||||||
|
double mul_[3];
|
||||||
|
typename KeyViewType::non_const_value_type range_[3];
|
||||||
|
typename KeyViewType::non_const_value_type min_[3];
|
||||||
|
|
||||||
|
DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
|
||||||
|
typename KeyViewType::const_value_type max[] )
|
||||||
|
{
|
||||||
|
max_bins_[0] = max_bins__[0]+1;
|
||||||
|
max_bins_[1] = max_bins__[1]+1;
|
||||||
|
max_bins_[2] = max_bins__[2]+1;
|
||||||
|
mul_[0] = 1.0*max_bins__[0]/(max[0]-min[0]);
|
||||||
|
mul_[1] = 1.0*max_bins__[1]/(max[1]-min[1]);
|
||||||
|
mul_[2] = 1.0*max_bins__[2]/(max[2]-min[2]);
|
||||||
|
range_[0] = max[0]-min[0];
|
||||||
|
range_[1] = max[1]-min[1];
|
||||||
|
range_[2] = max[2]-min[2];
|
||||||
|
min_[0] = min[0];
|
||||||
|
min_[1] = min[1];
|
||||||
|
min_[2] = min[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class ViewType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
int bin(ViewType& keys, const int& i) const {
|
||||||
|
return int( (((int(mul_[0]*(keys(i,0)-min_[0]))*max_bins_[1]) +
|
||||||
|
int(mul_[1]*(keys(i,1)-min_[1])))*max_bins_[2]) +
|
||||||
|
int(mul_[2]*(keys(i,2)-min_[2])));
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
int max_bins() const {
|
||||||
|
return max_bins_[0]*max_bins_[1]*max_bins_[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class ViewType, typename iType1, typename iType2>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
bool operator()(ViewType& keys, iType1& i1 , iType2& i2) const {
|
||||||
|
if (keys(i1,0)>keys(i2,0)) return true;
|
||||||
|
else if (keys(i1,0)==keys(i2,0)) {
|
||||||
|
if (keys(i1,1)>keys(i2,1)) return true;
|
||||||
|
else if (keys(i1,1)==keys(i2,2)) {
|
||||||
|
if (keys(i1,2)>keys(i2,2)) return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
struct min_max {
|
||||||
|
Scalar min;
|
||||||
|
Scalar max;
|
||||||
|
bool init;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
min_max() {
|
||||||
|
min = 0;
|
||||||
|
max = 0;
|
||||||
|
init = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
min_max (const min_max& val) {
|
||||||
|
min = val.min;
|
||||||
|
max = val.max;
|
||||||
|
init = val.init;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
min_max operator = (const min_max& val) {
|
||||||
|
min = val.min;
|
||||||
|
max = val.max;
|
||||||
|
init = val.init;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator+= (const Scalar& val) {
|
||||||
|
if(init) {
|
||||||
|
min = min<val?min:val;
|
||||||
|
max = max>val?max:val;
|
||||||
|
} else {
|
||||||
|
min = val;
|
||||||
|
max = val;
|
||||||
|
init = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator+= (const min_max& val) {
|
||||||
|
if(init && val.init) {
|
||||||
|
min = min<val.min?min:val.min;
|
||||||
|
max = max>val.max?max:val.max;
|
||||||
|
} else {
|
||||||
|
if(val.init) {
|
||||||
|
min = val.min;
|
||||||
|
max = val.max;
|
||||||
|
init = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator+= (volatile const Scalar& val) volatile {
|
||||||
|
if(init) {
|
||||||
|
min = min<val?min:val;
|
||||||
|
max = max>val?max:val;
|
||||||
|
} else {
|
||||||
|
min = val;
|
||||||
|
max = val;
|
||||||
|
init = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator+= (volatile const min_max& val) volatile {
|
||||||
|
if(init && val.init) {
|
||||||
|
min = min<val.min?min:val.min;
|
||||||
|
max = max>val.max?max:val.max;
|
||||||
|
} else {
|
||||||
|
if(val.init) {
|
||||||
|
min = val.min;
|
||||||
|
max = val.max;
|
||||||
|
init = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<class ViewType>
|
||||||
|
struct min_max_functor {
|
||||||
|
typedef typename ViewType::execution_space execution_space;
|
||||||
|
ViewType view;
|
||||||
|
typedef min_max<typename ViewType::non_const_value_type> value_type;
|
||||||
|
min_max_functor (const ViewType view_):view(view_) {
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(const size_t& i, value_type& val) const {
|
||||||
|
val += view(i);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType>
|
||||||
|
bool try_std_sort(ViewType view) {
|
||||||
|
bool possible = true;
|
||||||
|
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
||||||
|
size_t stride[8];
|
||||||
|
view.stride(stride);
|
||||||
|
#else
|
||||||
|
size_t stride[8] = { view.stride_0()
|
||||||
|
, view.stride_1()
|
||||||
|
, view.stride_2()
|
||||||
|
, view.stride_3()
|
||||||
|
, view.stride_4()
|
||||||
|
, view.stride_5()
|
||||||
|
, view.stride_6()
|
||||||
|
, view.stride_7()
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
possible = possible && Impl::is_same<typename ViewType::memory_space, HostSpace>::value;
|
||||||
|
possible = possible && (ViewType::Rank == 1);
|
||||||
|
possible = possible && (stride[0] == 1);
|
||||||
|
if(possible) {
|
||||||
|
std::sort(view.ptr_on_device(),view.ptr_on_device()+view.dimension_0());
|
||||||
|
}
|
||||||
|
return possible;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class ViewType>
|
||||||
|
void sort(ViewType view, bool always_use_kokkos_sort = false) {
|
||||||
|
if(!always_use_kokkos_sort) {
|
||||||
|
if(SortImpl::try_std_sort(view)) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef SortImpl::DefaultBinOp1D<ViewType> CompType;
|
||||||
|
SortImpl::min_max<typename ViewType::non_const_value_type> val;
|
||||||
|
parallel_reduce(view.dimension_0(),SortImpl::min_max_functor<ViewType>(view),val);
|
||||||
|
BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true);
|
||||||
|
bin_sort.create_permute_vector();
|
||||||
|
bin_sort.sort(view);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*template<class ViewType, class Comparator>
|
||||||
|
void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) {
|
||||||
|
|
||||||
|
}*/
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
92
lib/kokkos/algorithms/unit_tests/Makefile
Executable file
92
lib/kokkos/algorithms/unit_tests/Makefile
Executable file
@ -0,0 +1,92 @@
|
|||||||
|
KOKKOS_PATH = ../..
|
||||||
|
|
||||||
|
GTEST_PATH = ../../TPL/gtest
|
||||||
|
|
||||||
|
vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests
|
||||||
|
|
||||||
|
default: build_all
|
||||||
|
echo "End Build"
|
||||||
|
|
||||||
|
|
||||||
|
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
CXX = nvcc_wrapper
|
||||||
|
CXXFLAGS ?= -O3
|
||||||
|
LINK = $(CXX)
|
||||||
|
LDFLAGS ?= -lpthread
|
||||||
|
else
|
||||||
|
CXX ?= g++
|
||||||
|
CXXFLAGS ?= -O3
|
||||||
|
LINK ?= $(CXX)
|
||||||
|
LDFLAGS ?= -lpthread
|
||||||
|
endif
|
||||||
|
|
||||||
|
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests
|
||||||
|
|
||||||
|
TEST_TARGETS =
|
||||||
|
TARGETS =
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
|
||||||
|
TARGETS += KokkosAlgorithms_UnitTest_Cuda
|
||||||
|
TEST_TARGETS += test-cuda
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||||
|
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
|
||||||
|
TARGETS += KokkosAlgorithms_UnitTest_Threads
|
||||||
|
TEST_TARGETS += test-threads
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||||
|
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
|
||||||
|
TARGETS += KokkosAlgorithms_UnitTest_OpenMP
|
||||||
|
TEST_TARGETS += test-openmp
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||||
|
OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
|
||||||
|
TARGETS += KokkosAlgorithms_UnitTest_Serial
|
||||||
|
TEST_TARGETS += test-serial
|
||||||
|
endif
|
||||||
|
|
||||||
|
KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
|
||||||
|
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Cuda
|
||||||
|
|
||||||
|
KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
|
||||||
|
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Threads
|
||||||
|
|
||||||
|
KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
|
||||||
|
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_OpenMP
|
||||||
|
|
||||||
|
KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
|
||||||
|
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Serial
|
||||||
|
|
||||||
|
test-cuda: KokkosAlgorithms_UnitTest_Cuda
|
||||||
|
./KokkosAlgorithms_UnitTest_Cuda
|
||||||
|
|
||||||
|
test-threads: KokkosAlgorithms_UnitTest_Threads
|
||||||
|
./KokkosAlgorithms_UnitTest_Threads
|
||||||
|
|
||||||
|
test-openmp: KokkosAlgorithms_UnitTest_OpenMP
|
||||||
|
./KokkosAlgorithms_UnitTest_OpenMP
|
||||||
|
|
||||||
|
test-serial: KokkosAlgorithms_UnitTest_Serial
|
||||||
|
./KokkosAlgorithms_UnitTest_Serial
|
||||||
|
|
||||||
|
build_all: $(TARGETS)
|
||||||
|
|
||||||
|
test: $(TEST_TARGETS)
|
||||||
|
|
||||||
|
clean: kokkos-clean
|
||||||
|
rm -f *.o $(TARGETS)
|
||||||
|
|
||||||
|
# Compilation rules
|
||||||
|
|
||||||
|
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||||
|
|
||||||
|
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
|
||||||
|
|
||||||
110
lib/kokkos/algorithms/unit_tests/TestCuda.cpp
Executable file
110
lib/kokkos/algorithms/unit_tests/TestCuda.cpp
Executable file
@ -0,0 +1,110 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
#include <TestRandom.hpp>
|
||||||
|
#include <TestSort.hpp>
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
class cuda : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase()
|
||||||
|
{
|
||||||
|
std::cout << std::setprecision(5) << std::scientific;
|
||||||
|
Kokkos::HostSpace::execution_space::initialize();
|
||||||
|
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
|
||||||
|
}
|
||||||
|
static void TearDownTestCase()
|
||||||
|
{
|
||||||
|
Kokkos::Cuda::finalize();
|
||||||
|
Kokkos::HostSpace::execution_space::finalize();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void cuda_test_random_xorshift64( int num_draws )
|
||||||
|
{
|
||||||
|
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Cuda> >(num_draws);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cuda_test_random_xorshift1024( int num_draws )
|
||||||
|
{
|
||||||
|
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Cuda> >(num_draws);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define CUDA_RANDOM_XORSHIFT64( num_draws ) \
|
||||||
|
TEST_F( cuda, Random_XorShift64 ) { \
|
||||||
|
cuda_test_random_xorshift64(num_draws); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CUDA_RANDOM_XORSHIFT1024( num_draws ) \
|
||||||
|
TEST_F( cuda, Random_XorShift1024 ) { \
|
||||||
|
cuda_test_random_xorshift1024(num_draws); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CUDA_SORT_UNSIGNED( size ) \
|
||||||
|
TEST_F( cuda, SortUnsigned ) { \
|
||||||
|
Impl::test_sort< Kokkos::Cuda, unsigned >(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
CUDA_RANDOM_XORSHIFT64( 132141141 )
|
||||||
|
CUDA_RANDOM_XORSHIFT1024( 52428813 )
|
||||||
|
CUDA_SORT_UNSIGNED(171)
|
||||||
|
|
||||||
|
#undef CUDA_RANDOM_XORSHIFT64
|
||||||
|
#undef CUDA_RANDOM_XORSHIFT1024
|
||||||
|
#undef CUDA_SORT_UNSIGNED
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* #ifdef KOKKOS_HAVE_CUDA */
|
||||||
|
|
||||||
102
lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp
Executable file
102
lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp
Executable file
@ -0,0 +1,102 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
#include <TestRandom.hpp>
|
||||||
|
#include <TestSort.hpp>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_OPENMP
|
||||||
|
class openmp : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase()
|
||||||
|
{
|
||||||
|
std::cout << std::setprecision(5) << std::scientific;
|
||||||
|
|
||||||
|
unsigned threads_count = omp_get_max_threads();
|
||||||
|
|
||||||
|
if ( Kokkos::hwloc::available() ) {
|
||||||
|
threads_count = Kokkos::hwloc::get_available_numa_count() *
|
||||||
|
Kokkos::hwloc::get_available_cores_per_numa();
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::OpenMP::initialize( threads_count );
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TearDownTestCase()
|
||||||
|
{
|
||||||
|
Kokkos::OpenMP::finalize();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#define OPENMP_RANDOM_XORSHIFT64( num_draws ) \
|
||||||
|
TEST_F( openmp, Random_XorShift64 ) { \
|
||||||
|
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::OpenMP> >(num_draws); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OPENMP_RANDOM_XORSHIFT1024( num_draws ) \
|
||||||
|
TEST_F( openmp, Random_XorShift1024 ) { \
|
||||||
|
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::OpenMP> >(num_draws); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OPENMP_SORT_UNSIGNED( size ) \
|
||||||
|
TEST_F( openmp, SortUnsigned ) { \
|
||||||
|
Impl::test_sort< Kokkos::OpenMP, unsigned >(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
OPENMP_RANDOM_XORSHIFT64( 10240000 )
|
||||||
|
OPENMP_RANDOM_XORSHIFT1024( 10130144 )
|
||||||
|
OPENMP_SORT_UNSIGNED(171)
|
||||||
|
|
||||||
|
#undef OPENMP_RANDOM_XORSHIFT64
|
||||||
|
#undef OPENMP_RANDOM_XORSHIFT1024
|
||||||
|
#undef OPENMP_SORT_UNSIGNED
|
||||||
|
#endif
|
||||||
|
} // namespace test
|
||||||
|
|
||||||
476
lib/kokkos/algorithms/unit_tests/TestRandom.hpp
Executable file
476
lib/kokkos/algorithms/unit_tests/TestRandom.hpp
Executable file
@ -0,0 +1,476 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
|
||||||
|
#ifndef KOKKOS_TEST_DUALVIEW_HPP
|
||||||
|
#define KOKKOS_TEST_DUALVIEW_HPP
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <impl/Kokkos_Timer.hpp>
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
#include <Kokkos_Random.hpp>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
namespace Impl{
|
||||||
|
|
||||||
|
// This test runs the random number generators and uses some statistic tests to
|
||||||
|
// check the 'goodness' of the random numbers:
|
||||||
|
// (i) mean: the mean is expected to be 0.5*RAND_MAX
|
||||||
|
// (ii) variance: the variance is 1/3*mean*mean
|
||||||
|
// (iii) covariance: the covariance is 0
|
||||||
|
// (iv) 1-tupledistr: the mean, variance and covariance of a 1D Histrogram of random numbers
|
||||||
|
// (v) 3-tupledistr: the mean, variance and covariance of a 3D Histrogram of random numbers
|
||||||
|
|
||||||
|
#define HIST_DIM3D 24
|
||||||
|
#define HIST_DIM1D (HIST_DIM3D*HIST_DIM3D*HIST_DIM3D)
|
||||||
|
|
||||||
|
struct RandomProperties {
|
||||||
|
uint64_t count;
|
||||||
|
double mean;
|
||||||
|
double variance;
|
||||||
|
double covariance;
|
||||||
|
double min;
|
||||||
|
double max;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
RandomProperties() {
|
||||||
|
count = 0;
|
||||||
|
mean = 0.0;
|
||||||
|
variance = 0.0;
|
||||||
|
covariance = 0.0;
|
||||||
|
min = 1e64;
|
||||||
|
max = -1e64;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
RandomProperties& operator+=(const RandomProperties& add) {
|
||||||
|
count += add.count;
|
||||||
|
mean += add.mean;
|
||||||
|
variance += add.variance;
|
||||||
|
covariance += add.covariance;
|
||||||
|
min = add.min<min?add.min:min;
|
||||||
|
max = add.max>max?add.max:max;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator+=(const volatile RandomProperties& add) volatile {
|
||||||
|
count += add.count;
|
||||||
|
mean += add.mean;
|
||||||
|
variance += add.variance;
|
||||||
|
covariance += add.covariance;
|
||||||
|
min = add.min<min?add.min:min;
|
||||||
|
max = add.max>max?add.max:max;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class GeneratorPool, class Scalar>
|
||||||
|
struct test_random_functor {
|
||||||
|
typedef typename GeneratorPool::generator_type rnd_type;
|
||||||
|
|
||||||
|
typedef RandomProperties value_type;
|
||||||
|
typedef typename GeneratorPool::device_type device_type;
|
||||||
|
|
||||||
|
GeneratorPool rand_pool;
|
||||||
|
const double mean;
|
||||||
|
|
||||||
|
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
|
||||||
|
// an exclusive upper bound on the range of random numbers that
|
||||||
|
// draw() can generate. However, for the float specialization, some
|
||||||
|
// implementations might violate this upper bound, due to rounding
|
||||||
|
// error. Just in case, we leave an extra space at the end of each
|
||||||
|
// dimension, in the View types below.
|
||||||
|
typedef Kokkos::View<int[HIST_DIM1D+1],typename GeneratorPool::device_type> type_1d;
|
||||||
|
type_1d density_1d;
|
||||||
|
typedef Kokkos::View<int[HIST_DIM3D+1][HIST_DIM3D+1][HIST_DIM3D+1],typename GeneratorPool::device_type> type_3d;
|
||||||
|
type_3d density_3d;
|
||||||
|
|
||||||
|
test_random_functor (GeneratorPool rand_pool_, type_1d d1d, type_3d d3d) :
|
||||||
|
rand_pool (rand_pool_),
|
||||||
|
mean (0.5*Kokkos::rand<rnd_type,Scalar>::max ()),
|
||||||
|
density_1d (d1d),
|
||||||
|
density_3d (d3d)
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (int i, RandomProperties& prop) const {
|
||||||
|
using Kokkos::atomic_fetch_add;
|
||||||
|
|
||||||
|
rnd_type rand_gen = rand_pool.get_state();
|
||||||
|
for (int k = 0; k < 1024; ++k) {
|
||||||
|
const Scalar tmp = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
|
||||||
|
prop.count++;
|
||||||
|
prop.mean += tmp;
|
||||||
|
prop.variance += (tmp-mean)*(tmp-mean);
|
||||||
|
const Scalar tmp2 = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
|
||||||
|
prop.count++;
|
||||||
|
prop.mean += tmp2;
|
||||||
|
prop.variance += (tmp2-mean)*(tmp2-mean);
|
||||||
|
prop.covariance += (tmp-mean)*(tmp2-mean);
|
||||||
|
const Scalar tmp3 = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
|
||||||
|
prop.count++;
|
||||||
|
prop.mean += tmp3;
|
||||||
|
prop.variance += (tmp3-mean)*(tmp3-mean);
|
||||||
|
prop.covariance += (tmp2-mean)*(tmp3-mean);
|
||||||
|
|
||||||
|
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to
|
||||||
|
// define an exclusive upper bound on the range of random
|
||||||
|
// numbers that draw() can generate. However, for the float
|
||||||
|
// specialization, some implementations might violate this upper
|
||||||
|
// bound, due to rounding error. Just in case, we have left an
|
||||||
|
// extra space at the end of each dimension of density_1d and
|
||||||
|
// density_3d.
|
||||||
|
//
|
||||||
|
// Please note that those extra entries might not get counted in
|
||||||
|
// the histograms. However, if Kokkos::rand is broken and only
|
||||||
|
// returns values of max(), the histograms will still catch this
|
||||||
|
// indirectly, since none of the other values will be filled in.
|
||||||
|
|
||||||
|
const Scalar theMax = Kokkos::rand<rnd_type, Scalar>::max ();
|
||||||
|
|
||||||
|
const uint64_t ind1_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp / theMax);
|
||||||
|
const uint64_t ind2_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp2 / theMax);
|
||||||
|
const uint64_t ind3_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp3 / theMax);
|
||||||
|
|
||||||
|
const uint64_t ind1_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp / theMax);
|
||||||
|
const uint64_t ind2_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp2 / theMax);
|
||||||
|
const uint64_t ind3_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp3 / theMax);
|
||||||
|
|
||||||
|
atomic_fetch_add (&density_1d(ind1_1d), 1);
|
||||||
|
atomic_fetch_add (&density_1d(ind2_1d), 1);
|
||||||
|
atomic_fetch_add (&density_1d(ind3_1d), 1);
|
||||||
|
atomic_fetch_add (&density_3d(ind1_3d, ind2_3d, ind3_3d), 1);
|
||||||
|
}
|
||||||
|
rand_pool.free_state(rand_gen);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
struct test_histogram1d_functor {
|
||||||
|
typedef RandomProperties value_type;
|
||||||
|
typedef typename DeviceType::execution_space execution_space;
|
||||||
|
typedef typename DeviceType::memory_space memory_space;
|
||||||
|
|
||||||
|
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
|
||||||
|
// an exclusive upper bound on the range of random numbers that
|
||||||
|
// draw() can generate. However, for the float specialization, some
|
||||||
|
// implementations might violate this upper bound, due to rounding
|
||||||
|
// error. Just in case, we leave an extra space at the end of each
|
||||||
|
// dimension, in the View type below.
|
||||||
|
typedef Kokkos::View<int[HIST_DIM1D+1], memory_space> type_1d;
|
||||||
|
type_1d density_1d;
|
||||||
|
double mean;
|
||||||
|
|
||||||
|
test_histogram1d_functor (type_1d d1d, int num_draws) :
|
||||||
|
density_1d (d1d),
|
||||||
|
mean (1.0*num_draws/HIST_DIM1D*3)
|
||||||
|
{
|
||||||
|
printf ("Mean: %e\n", mean);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION void
|
||||||
|
operator() (const typename memory_space::size_type i,
|
||||||
|
RandomProperties& prop) const
|
||||||
|
{
|
||||||
|
typedef typename memory_space::size_type size_type;
|
||||||
|
const double count = density_1d(i);
|
||||||
|
prop.mean += count;
|
||||||
|
prop.variance += 1.0 * (count - mean) * (count - mean);
|
||||||
|
//prop.covariance += 1.0*count*count;
|
||||||
|
prop.min = count < prop.min ? count : prop.min;
|
||||||
|
prop.max = count > prop.max ? count : prop.max;
|
||||||
|
if (i < static_cast<size_type> (HIST_DIM1D-1)) {
|
||||||
|
prop.covariance += (count - mean) * (density_1d(i+1) - mean);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
struct test_histogram3d_functor {
|
||||||
|
typedef RandomProperties value_type;
|
||||||
|
typedef typename DeviceType::execution_space execution_space;
|
||||||
|
typedef typename DeviceType::memory_space memory_space;
|
||||||
|
|
||||||
|
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
|
||||||
|
// an exclusive upper bound on the range of random numbers that
|
||||||
|
// draw() can generate. However, for the float specialization, some
|
||||||
|
// implementations might violate this upper bound, due to rounding
|
||||||
|
// error. Just in case, we leave an extra space at the end of each
|
||||||
|
// dimension, in the View type below.
|
||||||
|
typedef Kokkos::View<int[HIST_DIM3D+1][HIST_DIM3D+1][HIST_DIM3D+1], memory_space> type_3d;
|
||||||
|
type_3d density_3d;
|
||||||
|
double mean;
|
||||||
|
|
||||||
|
test_histogram3d_functor (type_3d d3d, int num_draws) :
|
||||||
|
density_3d (d3d),
|
||||||
|
mean (1.0*num_draws/HIST_DIM1D)
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION void
|
||||||
|
operator() (const typename memory_space::size_type i,
|
||||||
|
RandomProperties& prop) const
|
||||||
|
{
|
||||||
|
typedef typename memory_space::size_type size_type;
|
||||||
|
const double count = density_3d(i/(HIST_DIM3D*HIST_DIM3D),
|
||||||
|
(i % (HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D,
|
||||||
|
i % HIST_DIM3D);
|
||||||
|
prop.mean += count;
|
||||||
|
prop.variance += (count - mean) * (count - mean);
|
||||||
|
if (i < static_cast<size_type> (HIST_DIM1D-1)) {
|
||||||
|
const double count_next = density_3d((i+1)/(HIST_DIM3D*HIST_DIM3D),
|
||||||
|
((i+1)%(HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D,
|
||||||
|
(i+1)%HIST_DIM3D);
|
||||||
|
prop.covariance += (count - mean) * (count_next - mean);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// Templated test that uses the above functors.
|
||||||
|
//
|
||||||
|
template <class RandomGenerator,class Scalar>
|
||||||
|
struct test_random_scalar {
|
||||||
|
typedef typename RandomGenerator::generator_type rnd_type;
|
||||||
|
|
||||||
|
int pass_mean,pass_var,pass_covar;
|
||||||
|
int pass_hist1d_mean,pass_hist1d_var,pass_hist1d_covar;
|
||||||
|
int pass_hist3d_mean,pass_hist3d_var,pass_hist3d_covar;
|
||||||
|
|
||||||
|
test_random_scalar (typename test_random_functor<RandomGenerator,int>::type_1d& density_1d,
|
||||||
|
typename test_random_functor<RandomGenerator,int>::type_3d& density_3d,
|
||||||
|
RandomGenerator& pool,
|
||||||
|
unsigned int num_draws)
|
||||||
|
{
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
using Kokkos::parallel_reduce;
|
||||||
|
|
||||||
|
{
|
||||||
|
cerr << " -- Testing randomness properties" << endl;
|
||||||
|
|
||||||
|
RandomProperties result;
|
||||||
|
typedef test_random_functor<RandomGenerator, Scalar> functor_type;
|
||||||
|
parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result);
|
||||||
|
|
||||||
|
//printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2);
|
||||||
|
double tolerance = 2.0*sqrt(1.0/num_draws);
|
||||||
|
double mean_expect = 0.5*Kokkos::rand<rnd_type,Scalar>::max();
|
||||||
|
double variance_expect = 1.0/3.0*mean_expect*mean_expect;
|
||||||
|
double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0;
|
||||||
|
double variance_eps = variance_expect/(result.variance/num_draws/3)-1.0;
|
||||||
|
double covariance_eps = result.covariance/num_draws/2/variance_expect;
|
||||||
|
pass_mean = ((-tolerance < mean_eps) &&
|
||||||
|
( tolerance > mean_eps)) ? 1:0;
|
||||||
|
pass_var = ((-tolerance < variance_eps) &&
|
||||||
|
( tolerance > variance_eps)) ? 1:0;
|
||||||
|
pass_covar = ((-1.4*tolerance < covariance_eps) &&
|
||||||
|
( 1.4*tolerance > covariance_eps)) ? 1:0;
|
||||||
|
cerr << "Pass: " << pass_mean
|
||||||
|
<< " " << pass_var
|
||||||
|
<< " " << mean_eps
|
||||||
|
<< " " << variance_eps
|
||||||
|
<< " " << covariance_eps
|
||||||
|
<< " || " << tolerance << endl;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
cerr << " -- Testing 1-D histogram" << endl;
|
||||||
|
|
||||||
|
RandomProperties result;
|
||||||
|
typedef test_histogram1d_functor<typename RandomGenerator::device_type> functor_type;
|
||||||
|
parallel_reduce (HIST_DIM1D, functor_type (density_1d, num_draws), result);
|
||||||
|
|
||||||
|
double tolerance = 6*sqrt(1.0/HIST_DIM1D);
|
||||||
|
double mean_expect = 1.0*num_draws*3/HIST_DIM1D;
|
||||||
|
double variance_expect = 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D);
|
||||||
|
double covariance_expect = -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D;
|
||||||
|
double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0;
|
||||||
|
double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0;
|
||||||
|
double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect;
|
||||||
|
pass_hist1d_mean = ((-tolerance < mean_eps) &&
|
||||||
|
( tolerance > mean_eps)) ? 1:0;
|
||||||
|
pass_hist1d_var = ((-tolerance < variance_eps) &&
|
||||||
|
( tolerance > variance_eps)) ? 1:0;
|
||||||
|
pass_hist1d_covar = ((-tolerance < covariance_eps) &&
|
||||||
|
( tolerance > covariance_eps)) ? 1:0;
|
||||||
|
|
||||||
|
cerr << "Density 1D: " << mean_eps
|
||||||
|
<< " " << variance_eps
|
||||||
|
<< " " << (result.covariance/HIST_DIM1D/HIST_DIM1D)
|
||||||
|
<< " || " << tolerance
|
||||||
|
<< " " << result.min
|
||||||
|
<< " " << result.max
|
||||||
|
<< " || " << result.variance/HIST_DIM1D
|
||||||
|
<< " " << 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D)
|
||||||
|
<< " || " << result.covariance/HIST_DIM1D
|
||||||
|
<< " " << -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D
|
||||||
|
<< endl;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
cerr << " -- Testing 3-D histogram" << endl;
|
||||||
|
|
||||||
|
RandomProperties result;
|
||||||
|
typedef test_histogram3d_functor<typename RandomGenerator::device_type> functor_type;
|
||||||
|
parallel_reduce (HIST_DIM1D, functor_type (density_3d, num_draws), result);
|
||||||
|
|
||||||
|
double tolerance = 6*sqrt(1.0/HIST_DIM1D);
|
||||||
|
double mean_expect = 1.0*num_draws/HIST_DIM1D;
|
||||||
|
double variance_expect = 1.0*num_draws/HIST_DIM1D*(1.0-1.0/HIST_DIM1D);
|
||||||
|
double covariance_expect = -1.0*num_draws/HIST_DIM1D/HIST_DIM1D;
|
||||||
|
double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0;
|
||||||
|
double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0;
|
||||||
|
double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect;
|
||||||
|
pass_hist3d_mean = ((-tolerance < mean_eps) &&
|
||||||
|
( tolerance > mean_eps)) ? 1:0;
|
||||||
|
pass_hist3d_var = ((-tolerance < variance_eps) &&
|
||||||
|
( tolerance > variance_eps)) ? 1:0;
|
||||||
|
pass_hist3d_covar = ((-tolerance < covariance_eps) &&
|
||||||
|
( tolerance > covariance_eps)) ? 1:0;
|
||||||
|
|
||||||
|
cerr << "Density 3D: " << mean_eps
|
||||||
|
<< " " << variance_eps
|
||||||
|
<< " " << result.covariance/HIST_DIM1D/HIST_DIM1D
|
||||||
|
<< " || " << tolerance
|
||||||
|
<< " " << result.min
|
||||||
|
<< " " << result.max << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class RandomGenerator>
|
||||||
|
void test_random(unsigned int num_draws)
|
||||||
|
{
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
typename test_random_functor<RandomGenerator,int>::type_1d density_1d("D1d");
|
||||||
|
typename test_random_functor<RandomGenerator,int>::type_3d density_3d("D3d");
|
||||||
|
|
||||||
|
cerr << "Test Scalar=int" << endl;
|
||||||
|
RandomGenerator pool(31891);
|
||||||
|
test_random_scalar<RandomGenerator,int> test_int(density_1d,density_3d,pool,num_draws);
|
||||||
|
ASSERT_EQ( test_int.pass_mean,1);
|
||||||
|
ASSERT_EQ( test_int.pass_var,1);
|
||||||
|
ASSERT_EQ( test_int.pass_covar,1);
|
||||||
|
ASSERT_EQ( test_int.pass_hist1d_mean,1);
|
||||||
|
ASSERT_EQ( test_int.pass_hist1d_var,1);
|
||||||
|
ASSERT_EQ( test_int.pass_hist1d_covar,1);
|
||||||
|
ASSERT_EQ( test_int.pass_hist3d_mean,1);
|
||||||
|
ASSERT_EQ( test_int.pass_hist3d_var,1);
|
||||||
|
ASSERT_EQ( test_int.pass_hist3d_covar,1);
|
||||||
|
deep_copy(density_1d,0);
|
||||||
|
deep_copy(density_3d,0);
|
||||||
|
|
||||||
|
cerr << "Test Scalar=unsigned int" << endl;
|
||||||
|
test_random_scalar<RandomGenerator,unsigned int> test_uint(density_1d,density_3d,pool,num_draws);
|
||||||
|
ASSERT_EQ( test_uint.pass_mean,1);
|
||||||
|
ASSERT_EQ( test_uint.pass_var,1);
|
||||||
|
ASSERT_EQ( test_uint.pass_covar,1);
|
||||||
|
ASSERT_EQ( test_uint.pass_hist1d_mean,1);
|
||||||
|
ASSERT_EQ( test_uint.pass_hist1d_var,1);
|
||||||
|
ASSERT_EQ( test_uint.pass_hist1d_covar,1);
|
||||||
|
ASSERT_EQ( test_uint.pass_hist3d_mean,1);
|
||||||
|
ASSERT_EQ( test_uint.pass_hist3d_var,1);
|
||||||
|
ASSERT_EQ( test_uint.pass_hist3d_covar,1);
|
||||||
|
deep_copy(density_1d,0);
|
||||||
|
deep_copy(density_3d,0);
|
||||||
|
|
||||||
|
cerr << "Test Scalar=int64_t" << endl;
|
||||||
|
test_random_scalar<RandomGenerator,int64_t> test_int64(density_1d,density_3d,pool,num_draws);
|
||||||
|
ASSERT_EQ( test_int64.pass_mean,1);
|
||||||
|
ASSERT_EQ( test_int64.pass_var,1);
|
||||||
|
ASSERT_EQ( test_int64.pass_covar,1);
|
||||||
|
ASSERT_EQ( test_int64.pass_hist1d_mean,1);
|
||||||
|
ASSERT_EQ( test_int64.pass_hist1d_var,1);
|
||||||
|
ASSERT_EQ( test_int64.pass_hist1d_covar,1);
|
||||||
|
ASSERT_EQ( test_int64.pass_hist3d_mean,1);
|
||||||
|
ASSERT_EQ( test_int64.pass_hist3d_var,1);
|
||||||
|
ASSERT_EQ( test_int64.pass_hist3d_covar,1);
|
||||||
|
deep_copy(density_1d,0);
|
||||||
|
deep_copy(density_3d,0);
|
||||||
|
|
||||||
|
cerr << "Test Scalar=uint64_t" << endl;
|
||||||
|
test_random_scalar<RandomGenerator,uint64_t> test_uint64(density_1d,density_3d,pool,num_draws);
|
||||||
|
ASSERT_EQ( test_uint64.pass_mean,1);
|
||||||
|
ASSERT_EQ( test_uint64.pass_var,1);
|
||||||
|
ASSERT_EQ( test_uint64.pass_covar,1);
|
||||||
|
ASSERT_EQ( test_uint64.pass_hist1d_mean,1);
|
||||||
|
ASSERT_EQ( test_uint64.pass_hist1d_var,1);
|
||||||
|
ASSERT_EQ( test_uint64.pass_hist1d_covar,1);
|
||||||
|
ASSERT_EQ( test_uint64.pass_hist3d_mean,1);
|
||||||
|
ASSERT_EQ( test_uint64.pass_hist3d_var,1);
|
||||||
|
ASSERT_EQ( test_uint64.pass_hist3d_covar,1);
|
||||||
|
deep_copy(density_1d,0);
|
||||||
|
deep_copy(density_3d,0);
|
||||||
|
|
||||||
|
cerr << "Test Scalar=float" << endl;
|
||||||
|
test_random_scalar<RandomGenerator,float> test_float(density_1d,density_3d,pool,num_draws);
|
||||||
|
ASSERT_EQ( test_float.pass_mean,1);
|
||||||
|
ASSERT_EQ( test_float.pass_var,1);
|
||||||
|
ASSERT_EQ( test_float.pass_covar,1);
|
||||||
|
ASSERT_EQ( test_float.pass_hist1d_mean,1);
|
||||||
|
ASSERT_EQ( test_float.pass_hist1d_var,1);
|
||||||
|
ASSERT_EQ( test_float.pass_hist1d_covar,1);
|
||||||
|
ASSERT_EQ( test_float.pass_hist3d_mean,1);
|
||||||
|
ASSERT_EQ( test_float.pass_hist3d_var,1);
|
||||||
|
ASSERT_EQ( test_float.pass_hist3d_covar,1);
|
||||||
|
deep_copy(density_1d,0);
|
||||||
|
deep_copy(density_3d,0);
|
||||||
|
|
||||||
|
cerr << "Test Scalar=double" << endl;
|
||||||
|
test_random_scalar<RandomGenerator,double> test_double(density_1d,density_3d,pool,num_draws);
|
||||||
|
ASSERT_EQ( test_double.pass_mean,1);
|
||||||
|
ASSERT_EQ( test_double.pass_var,1);
|
||||||
|
ASSERT_EQ( test_double.pass_covar,1);
|
||||||
|
ASSERT_EQ( test_double.pass_hist1d_mean,1);
|
||||||
|
ASSERT_EQ( test_double.pass_hist1d_var,1);
|
||||||
|
ASSERT_EQ( test_double.pass_hist1d_covar,1);
|
||||||
|
ASSERT_EQ( test_double.pass_hist3d_mean,1);
|
||||||
|
ASSERT_EQ( test_double.pass_hist3d_var,1);
|
||||||
|
ASSERT_EQ( test_double.pass_hist3d_covar,1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Test
|
||||||
|
|
||||||
|
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP
|
||||||
99
lib/kokkos/algorithms/unit_tests/TestSerial.cpp
Executable file
99
lib/kokkos/algorithms/unit_tests/TestSerial.cpp
Executable file
@ -0,0 +1,99 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#include <TestRandom.hpp>
|
||||||
|
#include <TestSort.hpp>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_SERIAL
|
||||||
|
class serial : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase()
|
||||||
|
{
|
||||||
|
std::cout << std::setprecision (5) << std::scientific;
|
||||||
|
Kokkos::Serial::initialize ();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TearDownTestCase ()
|
||||||
|
{
|
||||||
|
Kokkos::Serial::finalize ();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#define SERIAL_RANDOM_XORSHIFT64( num_draws ) \
|
||||||
|
TEST_F( serial, Random_XorShift64 ) { \
|
||||||
|
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Serial> >(num_draws); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SERIAL_RANDOM_XORSHIFT1024( num_draws ) \
|
||||||
|
TEST_F( serial, Random_XorShift1024 ) { \
|
||||||
|
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Serial> >(num_draws); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SERIAL_SORT_UNSIGNED( size ) \
|
||||||
|
TEST_F( serial, SortUnsigned ) { \
|
||||||
|
Impl::test_sort< Kokkos::Serial, unsigned >(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
SERIAL_RANDOM_XORSHIFT64( 10240000 )
|
||||||
|
SERIAL_RANDOM_XORSHIFT1024( 10130144 )
|
||||||
|
SERIAL_SORT_UNSIGNED(171)
|
||||||
|
|
||||||
|
#undef SERIAL_RANDOM_XORSHIFT64
|
||||||
|
#undef SERIAL_RANDOM_XORSHIFT1024
|
||||||
|
#undef SERIAL_SORT_UNSIGNED
|
||||||
|
|
||||||
|
#endif // KOKKOS_HAVE_SERIAL
|
||||||
|
} // namespace Test
|
||||||
|
|
||||||
|
|
||||||
206
lib/kokkos/algorithms/unit_tests/TestSort.hpp
Executable file
206
lib/kokkos/algorithms/unit_tests/TestSort.hpp
Executable file
@ -0,0 +1,206 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
|
||||||
|
#ifndef TESTSORT_HPP_
|
||||||
|
#define TESTSORT_HPP_
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include<Kokkos_Core.hpp>
|
||||||
|
#include<Kokkos_Random.hpp>
|
||||||
|
#include<Kokkos_Sort.hpp>
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
namespace Impl{
|
||||||
|
|
||||||
|
template<class ExecutionSpace, class Scalar>
|
||||||
|
struct is_sorted_struct {
|
||||||
|
typedef unsigned int value_type;
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
|
||||||
|
Kokkos::View<Scalar*,ExecutionSpace> keys;
|
||||||
|
|
||||||
|
is_sorted_struct(Kokkos::View<Scalar*,ExecutionSpace> keys_):keys(keys_) {}
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (int i, unsigned int& count) const {
|
||||||
|
if(keys(i)>keys(i+1)) count++;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ExecutionSpace, class Scalar>
|
||||||
|
struct sum {
|
||||||
|
typedef double value_type;
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
|
||||||
|
Kokkos::View<Scalar*,ExecutionSpace> keys;
|
||||||
|
|
||||||
|
sum(Kokkos::View<Scalar*,ExecutionSpace> keys_):keys(keys_) {}
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (int i, double& count) const {
|
||||||
|
count+=keys(i);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ExecutionSpace, class Scalar>
|
||||||
|
struct bin3d_is_sorted_struct {
|
||||||
|
typedef unsigned int value_type;
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
|
||||||
|
Kokkos::View<Scalar*[3],ExecutionSpace> keys;
|
||||||
|
|
||||||
|
int max_bins;
|
||||||
|
Scalar min;
|
||||||
|
Scalar max;
|
||||||
|
|
||||||
|
bin3d_is_sorted_struct(Kokkos::View<Scalar*[3],ExecutionSpace> keys_,int max_bins_,Scalar min_,Scalar max_):
|
||||||
|
keys(keys_),max_bins(max_bins_),min(min_),max(max_) {
|
||||||
|
}
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (int i, unsigned int& count) const {
|
||||||
|
int ix1 = int ((keys(i,0)-min)/max * max_bins);
|
||||||
|
int iy1 = int ((keys(i,1)-min)/max * max_bins);
|
||||||
|
int iz1 = int ((keys(i,2)-min)/max * max_bins);
|
||||||
|
int ix2 = int ((keys(i+1,0)-min)/max * max_bins);
|
||||||
|
int iy2 = int ((keys(i+1,1)-min)/max * max_bins);
|
||||||
|
int iz2 = int ((keys(i+1,2)-min)/max * max_bins);
|
||||||
|
|
||||||
|
if (ix1>ix2) count++;
|
||||||
|
else if(ix1==ix2) {
|
||||||
|
if (iy1>iy2) count++;
|
||||||
|
else if ((iy1==iy2) && (iz1>iz2)) count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ExecutionSpace, class Scalar>
|
||||||
|
struct sum3D {
|
||||||
|
typedef double value_type;
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
|
||||||
|
Kokkos::View<Scalar*[3],ExecutionSpace> keys;
|
||||||
|
|
||||||
|
sum3D(Kokkos::View<Scalar*[3],ExecutionSpace> keys_):keys(keys_) {}
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (int i, double& count) const {
|
||||||
|
count+=keys(i,0);
|
||||||
|
count+=keys(i,1);
|
||||||
|
count+=keys(i,2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ExecutionSpace, typename KeyType>
|
||||||
|
void test_1D_sort(unsigned int n,bool force_kokkos) {
|
||||||
|
typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType;
|
||||||
|
KeyViewType keys("Keys",n);
|
||||||
|
|
||||||
|
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
|
||||||
|
Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND);
|
||||||
|
|
||||||
|
double sum_before = 0.0;
|
||||||
|
double sum_after = 0.0;
|
||||||
|
unsigned int sort_fails = 0;
|
||||||
|
|
||||||
|
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys),sum_before);
|
||||||
|
|
||||||
|
Kokkos::sort(keys,force_kokkos);
|
||||||
|
|
||||||
|
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys),sum_after);
|
||||||
|
Kokkos::parallel_reduce(n-1,is_sorted_struct<ExecutionSpace, KeyType>(keys),sort_fails);
|
||||||
|
|
||||||
|
double ratio = sum_before/sum_after;
|
||||||
|
double epsilon = 1e-10;
|
||||||
|
unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0;
|
||||||
|
|
||||||
|
ASSERT_EQ(sort_fails,0);
|
||||||
|
ASSERT_EQ(equal_sum,1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class ExecutionSpace, typename KeyType>
|
||||||
|
void test_3D_sort(unsigned int n) {
|
||||||
|
typedef Kokkos::View<KeyType*[3],ExecutionSpace > KeyViewType;
|
||||||
|
|
||||||
|
KeyViewType keys("Keys",n*n*n);
|
||||||
|
|
||||||
|
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
|
||||||
|
Kokkos::fill_random(keys,g,100.0);
|
||||||
|
|
||||||
|
double sum_before = 0.0;
|
||||||
|
double sum_after = 0.0;
|
||||||
|
unsigned int sort_fails = 0;
|
||||||
|
|
||||||
|
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_before);
|
||||||
|
|
||||||
|
int bin_1d = 1;
|
||||||
|
while( bin_1d*bin_1d*bin_1d*4< (int) keys.dimension_0() ) bin_1d*=2;
|
||||||
|
int bin_max[3] = {bin_1d,bin_1d,bin_1d};
|
||||||
|
typename KeyViewType::value_type min[3] = {0,0,0};
|
||||||
|
typename KeyViewType::value_type max[3] = {100,100,100};
|
||||||
|
|
||||||
|
typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp;
|
||||||
|
BinOp bin_op(bin_max,min,max);
|
||||||
|
Kokkos::BinSort< KeyViewType , BinOp >
|
||||||
|
Sorter(keys,bin_op,false);
|
||||||
|
Sorter.create_permute_vector();
|
||||||
|
Sorter.template sort< KeyViewType >(keys);
|
||||||
|
|
||||||
|
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_after);
|
||||||
|
Kokkos::parallel_reduce(keys.dimension_0()-1,bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys,bin_1d,min[0],max[0]),sort_fails);
|
||||||
|
|
||||||
|
double ratio = sum_before/sum_after;
|
||||||
|
double epsilon = 1e-10;
|
||||||
|
unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0;
|
||||||
|
|
||||||
|
printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails);
|
||||||
|
ASSERT_EQ(sort_fails,0);
|
||||||
|
ASSERT_EQ(equal_sum,1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class ExecutionSpace, typename KeyType>
|
||||||
|
void test_sort(unsigned int N)
|
||||||
|
{
|
||||||
|
test_1D_sort<ExecutionSpace,KeyType>(N*N*N, true);
|
||||||
|
test_1D_sort<ExecutionSpace,KeyType>(N*N*N, false);
|
||||||
|
test_3D_sort<ExecutionSpace,KeyType>(N);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* TESTSORT_HPP_ */
|
||||||
113
lib/kokkos/algorithms/unit_tests/TestThreads.cpp
Executable file
113
lib/kokkos/algorithms/unit_tests/TestThreads.cpp
Executable file
@ -0,0 +1,113 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#include <TestRandom.hpp>
|
||||||
|
#include <TestSort.hpp>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_PTHREAD
|
||||||
|
class threads : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase()
|
||||||
|
{
|
||||||
|
std::cout << std::setprecision(5) << std::scientific;
|
||||||
|
|
||||||
|
unsigned num_threads = 4;
|
||||||
|
|
||||||
|
if (Kokkos::hwloc::available()) {
|
||||||
|
num_threads = Kokkos::hwloc::get_available_numa_count()
|
||||||
|
* Kokkos::hwloc::get_available_cores_per_numa()
|
||||||
|
// * Kokkos::hwloc::get_available_threads_per_core()
|
||||||
|
;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Threads: " << num_threads << std::endl;
|
||||||
|
|
||||||
|
Kokkos::Threads::initialize( num_threads );
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TearDownTestCase()
|
||||||
|
{
|
||||||
|
Kokkos::Threads::finalize();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#define THREADS_RANDOM_XORSHIFT64( num_draws ) \
|
||||||
|
TEST_F( threads, Random_XorShift64 ) { \
|
||||||
|
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Threads> >(num_draws); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define THREADS_RANDOM_XORSHIFT1024( num_draws ) \
|
||||||
|
TEST_F( threads, Random_XorShift1024 ) { \
|
||||||
|
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Threads> >(num_draws); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define THREADS_SORT_UNSIGNED( size ) \
|
||||||
|
TEST_F( threads, SortUnsigned ) { \
|
||||||
|
Impl::test_sort< Kokkos::Threads, double >(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
THREADS_RANDOM_XORSHIFT64( 10240000 )
|
||||||
|
THREADS_RANDOM_XORSHIFT1024( 10130144 )
|
||||||
|
THREADS_SORT_UNSIGNED(171)
|
||||||
|
|
||||||
|
#undef THREADS_RANDOM_XORSHIFT64
|
||||||
|
#undef THREADS_RANDOM_XORSHIFT1024
|
||||||
|
#undef THREADS_SORT_UNSIGNED
|
||||||
|
|
||||||
|
#endif
|
||||||
|
} // namespace Test
|
||||||
|
|
||||||
|
|
||||||
50
lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp
Executable file
50
lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp
Executable file
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
::testing::InitGoogleTest(&argc,argv);
|
||||||
|
return RUN_ALL_TESTS();
|
||||||
|
}
|
||||||
|
|
||||||
81
lib/kokkos/containers/performance_tests/Makefile
Executable file
81
lib/kokkos/containers/performance_tests/Makefile
Executable file
@ -0,0 +1,81 @@
|
|||||||
|
KOKKOS_PATH = ../..
|
||||||
|
|
||||||
|
GTEST_PATH = ../../TPL/gtest
|
||||||
|
|
||||||
|
vpath %.cpp ${KOKKOS_PATH}/containers/performance_tests
|
||||||
|
|
||||||
|
default: build_all
|
||||||
|
echo "End Build"
|
||||||
|
|
||||||
|
|
||||||
|
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
CXX = nvcc_wrapper
|
||||||
|
CXXFLAGS ?= -O3
|
||||||
|
LINK = $(CXX)
|
||||||
|
LDFLAGS ?= -lpthread
|
||||||
|
else
|
||||||
|
CXX ?= g++
|
||||||
|
CXXFLAGS ?= -O3
|
||||||
|
LINK ?= $(CXX)
|
||||||
|
LDFLAGS ?= -lpthread
|
||||||
|
endif
|
||||||
|
|
||||||
|
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests
|
||||||
|
|
||||||
|
TEST_TARGETS =
|
||||||
|
TARGETS =
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
OBJ_CUDA = TestCuda.o TestMain.o gtest-all.o
|
||||||
|
TARGETS += KokkosContainers_PerformanceTest_Cuda
|
||||||
|
TEST_TARGETS += test-cuda
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||||
|
OBJ_THREADS = TestThreads.o TestMain.o gtest-all.o
|
||||||
|
TARGETS += KokkosContainers_PerformanceTest_Threads
|
||||||
|
TEST_TARGETS += test-threads
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||||
|
OBJ_OPENMP = TestOpenMP.o TestMain.o gtest-all.o
|
||||||
|
TARGETS += KokkosContainers_PerformanceTest_OpenMP
|
||||||
|
TEST_TARGETS += test-openmp
|
||||||
|
endif
|
||||||
|
|
||||||
|
KokkosContainers_PerformanceTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
|
||||||
|
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Cuda
|
||||||
|
|
||||||
|
KokkosContainers_PerformanceTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
|
||||||
|
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Threads
|
||||||
|
|
||||||
|
KokkosContainers_PerformanceTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
|
||||||
|
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_OpenMP
|
||||||
|
|
||||||
|
test-cuda: KokkosContainers_PerformanceTest_Cuda
|
||||||
|
./KokkosContainers_PerformanceTest_Cuda
|
||||||
|
|
||||||
|
test-threads: KokkosContainers_PerformanceTest_Threads
|
||||||
|
./KokkosContainers_PerformanceTest_Threads
|
||||||
|
|
||||||
|
test-openmp: KokkosContainers_PerformanceTest_OpenMP
|
||||||
|
./KokkosContainers_PerformanceTest_OpenMP
|
||||||
|
|
||||||
|
|
||||||
|
build_all: $(TARGETS)
|
||||||
|
|
||||||
|
test: $(TEST_TARGETS)
|
||||||
|
|
||||||
|
clean: kokkos-clean
|
||||||
|
rm -f *.o $(TARGETS)
|
||||||
|
|
||||||
|
# Compilation rules
|
||||||
|
|
||||||
|
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||||
|
|
||||||
|
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
|
||||||
|
|
||||||
100
lib/kokkos/containers/performance_tests/TestCuda.cpp
Executable file
100
lib/kokkos/containers/performance_tests/TestCuda.cpp
Executable file
@ -0,0 +1,100 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <sstream>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_CUDA )
|
||||||
|
|
||||||
|
#include <Kokkos_UnorderedMap.hpp>
|
||||||
|
|
||||||
|
#include <TestGlobal2LocalIds.hpp>
|
||||||
|
|
||||||
|
#include <TestUnorderedMapPerformance.hpp>
|
||||||
|
|
||||||
|
namespace Performance {
|
||||||
|
|
||||||
|
class cuda : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase()
|
||||||
|
{
|
||||||
|
std::cout << std::setprecision(5) << std::scientific;
|
||||||
|
Kokkos::HostSpace::execution_space::initialize();
|
||||||
|
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
|
||||||
|
}
|
||||||
|
static void TearDownTestCase()
|
||||||
|
{
|
||||||
|
Kokkos::Cuda::finalize();
|
||||||
|
Kokkos::HostSpace::execution_space::finalize();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F( cuda, global_2_local)
|
||||||
|
{
|
||||||
|
std::cout << "Cuda" << std::endl;
|
||||||
|
std::cout << "size, create, generate, fill, find" << std::endl;
|
||||||
|
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
|
||||||
|
test_global_to_local_ids<Kokkos::Cuda>(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F( cuda, unordered_map_performance_near)
|
||||||
|
{
|
||||||
|
Perf::run_performance_tests<Kokkos::Cuda,true>("cuda-near");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F( cuda, unordered_map_performance_far)
|
||||||
|
{
|
||||||
|
Perf::run_performance_tests<Kokkos::Cuda,false>("cuda-far");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||||
231
lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp
Executable file
231
lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp
Executable file
@ -0,0 +1,231 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
|
||||||
|
#ifndef KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
|
||||||
|
#define KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
#include <Kokkos_UnorderedMap.hpp>
|
||||||
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include <impl/Kokkos_Timer.hpp>
|
||||||
|
|
||||||
|
// This test will simulate global ids
|
||||||
|
|
||||||
|
namespace Performance {
|
||||||
|
|
||||||
|
static const unsigned begin_id_size = 256u;
|
||||||
|
static const unsigned end_id_size = 1u << 22;
|
||||||
|
static const unsigned id_step = 2u;
|
||||||
|
|
||||||
|
union helper
|
||||||
|
{
|
||||||
|
uint32_t word;
|
||||||
|
uint8_t byte[4];
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Device>
|
||||||
|
struct generate_ids
|
||||||
|
{
|
||||||
|
typedef Device execution_space;
|
||||||
|
typedef typename execution_space::size_type size_type;
|
||||||
|
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
|
||||||
|
|
||||||
|
local_id_view local_2_global;
|
||||||
|
|
||||||
|
generate_ids( local_id_view & ids)
|
||||||
|
: local_2_global(ids)
|
||||||
|
{
|
||||||
|
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(size_type i) const
|
||||||
|
{
|
||||||
|
|
||||||
|
helper x = {static_cast<uint32_t>(i)};
|
||||||
|
|
||||||
|
// shuffle the bytes of i to create a unique, semi-random global_id
|
||||||
|
x.word = ~x.word;
|
||||||
|
|
||||||
|
uint8_t tmp = x.byte[3];
|
||||||
|
x.byte[3] = x.byte[1];
|
||||||
|
x.byte[1] = tmp;
|
||||||
|
|
||||||
|
tmp = x.byte[2];
|
||||||
|
x.byte[2] = x.byte[0];
|
||||||
|
x.byte[0] = tmp;
|
||||||
|
|
||||||
|
local_2_global[i] = x.word;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Device>
|
||||||
|
struct fill_map
|
||||||
|
{
|
||||||
|
typedef Device execution_space;
|
||||||
|
typedef typename execution_space::size_type size_type;
|
||||||
|
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
|
||||||
|
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
|
||||||
|
|
||||||
|
global_id_view global_2_local;
|
||||||
|
local_id_view local_2_global;
|
||||||
|
|
||||||
|
fill_map( global_id_view gIds, local_id_view lIds)
|
||||||
|
: global_2_local(gIds) , local_2_global(lIds)
|
||||||
|
{
|
||||||
|
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(size_type i) const
|
||||||
|
{
|
||||||
|
global_2_local.insert( local_2_global[i], i);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Device>
|
||||||
|
struct find_test
|
||||||
|
{
|
||||||
|
typedef Device execution_space;
|
||||||
|
typedef typename execution_space::size_type size_type;
|
||||||
|
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
|
||||||
|
typedef Kokkos::UnorderedMap<const uint32_t, const size_type,execution_space> global_id_view;
|
||||||
|
|
||||||
|
global_id_view global_2_local;
|
||||||
|
local_id_view local_2_global;
|
||||||
|
|
||||||
|
typedef size_t value_type;
|
||||||
|
|
||||||
|
find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors)
|
||||||
|
: global_2_local(gIds) , local_2_global(lIds)
|
||||||
|
{
|
||||||
|
Kokkos::parallel_reduce(local_2_global.dimension_0(), *this, num_errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void init(value_type & v) const
|
||||||
|
{ v = 0; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void join(volatile value_type & dst, volatile value_type const & src) const
|
||||||
|
{ dst += src; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(size_type i, value_type & num_errors) const
|
||||||
|
{
|
||||||
|
uint32_t index = global_2_local.find( local_2_global[i] );
|
||||||
|
|
||||||
|
if ( global_2_local.value_at(index) != i) ++num_errors;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Device>
|
||||||
|
void test_global_to_local_ids(unsigned num_ids)
|
||||||
|
{
|
||||||
|
|
||||||
|
typedef Device execution_space;
|
||||||
|
typedef typename execution_space::size_type size_type;
|
||||||
|
|
||||||
|
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
|
||||||
|
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
|
||||||
|
|
||||||
|
//size
|
||||||
|
std::cout << num_ids << ", ";
|
||||||
|
|
||||||
|
double elasped_time = 0;
|
||||||
|
Kokkos::Impl::Timer timer;
|
||||||
|
|
||||||
|
local_id_view local_2_global("local_ids", num_ids);
|
||||||
|
global_id_view global_2_local((3u*num_ids)/2u);
|
||||||
|
|
||||||
|
//create
|
||||||
|
elasped_time = timer.seconds();
|
||||||
|
std::cout << elasped_time << ", ";
|
||||||
|
timer.reset();
|
||||||
|
|
||||||
|
// generate unique ids
|
||||||
|
{
|
||||||
|
generate_ids<Device> gen(local_2_global);
|
||||||
|
}
|
||||||
|
Device::fence();
|
||||||
|
// generate
|
||||||
|
elasped_time = timer.seconds();
|
||||||
|
std::cout << elasped_time << ", ";
|
||||||
|
timer.reset();
|
||||||
|
|
||||||
|
{
|
||||||
|
fill_map<Device> fill(global_2_local, local_2_global);
|
||||||
|
}
|
||||||
|
Device::fence();
|
||||||
|
|
||||||
|
// fill
|
||||||
|
elasped_time = timer.seconds();
|
||||||
|
std::cout << elasped_time << ", ";
|
||||||
|
timer.reset();
|
||||||
|
|
||||||
|
|
||||||
|
size_t num_errors = 0;
|
||||||
|
for (int i=0; i<100; ++i)
|
||||||
|
{
|
||||||
|
find_test<Device> find(global_2_local, local_2_global,num_errors);
|
||||||
|
}
|
||||||
|
Device::fence();
|
||||||
|
|
||||||
|
// find
|
||||||
|
elasped_time = timer.seconds();
|
||||||
|
std::cout << elasped_time << std::endl;
|
||||||
|
|
||||||
|
ASSERT_EQ( num_errors, 0u);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace Performance
|
||||||
|
|
||||||
|
|
||||||
|
#endif //KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
|
||||||
|
|
||||||
50
lib/kokkos/containers/performance_tests/TestMain.cpp
Executable file
50
lib/kokkos/containers/performance_tests/TestMain.cpp
Executable file
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
::testing::InitGoogleTest(&argc,argv);
|
||||||
|
return RUN_ALL_TESTS();
|
||||||
|
}
|
||||||
|
|
||||||
131
lib/kokkos/containers/performance_tests/TestOpenMP.cpp
Executable file
131
lib/kokkos/containers/performance_tests/TestOpenMP.cpp
Executable file
@ -0,0 +1,131 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#include <Kokkos_UnorderedMap.hpp>
|
||||||
|
|
||||||
|
#include <TestGlobal2LocalIds.hpp>
|
||||||
|
#include <TestUnorderedMapPerformance.hpp>
|
||||||
|
|
||||||
|
#include <iomanip>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Performance {
|
||||||
|
|
||||||
|
class openmp : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase()
|
||||||
|
{
|
||||||
|
std::cout << std::setprecision(5) << std::scientific;
|
||||||
|
|
||||||
|
unsigned num_threads = 4;
|
||||||
|
|
||||||
|
if (Kokkos::hwloc::available()) {
|
||||||
|
num_threads = Kokkos::hwloc::get_available_numa_count()
|
||||||
|
* Kokkos::hwloc::get_available_cores_per_numa()
|
||||||
|
* Kokkos::hwloc::get_available_threads_per_core()
|
||||||
|
;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "OpenMP: " << num_threads << std::endl;
|
||||||
|
|
||||||
|
Kokkos::OpenMP::initialize( num_threads );
|
||||||
|
|
||||||
|
std::cout << "available threads: " << omp_get_max_threads() << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TearDownTestCase()
|
||||||
|
{
|
||||||
|
Kokkos::OpenMP::finalize();
|
||||||
|
|
||||||
|
omp_set_num_threads(1);
|
||||||
|
|
||||||
|
ASSERT_EQ( 1 , omp_get_max_threads() );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F( openmp, global_2_local)
|
||||||
|
{
|
||||||
|
std::cout << "OpenMP" << std::endl;
|
||||||
|
std::cout << "size, create, generate, fill, find" << std::endl;
|
||||||
|
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
|
||||||
|
test_global_to_local_ids<Kokkos::OpenMP>(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F( openmp, unordered_map_performance_near)
|
||||||
|
{
|
||||||
|
unsigned num_openmp = 4;
|
||||||
|
if (Kokkos::hwloc::available()) {
|
||||||
|
num_openmp = Kokkos::hwloc::get_available_numa_count() *
|
||||||
|
Kokkos::hwloc::get_available_cores_per_numa() *
|
||||||
|
Kokkos::hwloc::get_available_threads_per_core();
|
||||||
|
|
||||||
|
}
|
||||||
|
std::ostringstream base_file_name;
|
||||||
|
base_file_name << "openmp-" << num_openmp << "-near";
|
||||||
|
Perf::run_performance_tests<Kokkos::OpenMP,true>(base_file_name.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F( openmp, unordered_map_performance_far)
|
||||||
|
{
|
||||||
|
unsigned num_openmp = 4;
|
||||||
|
if (Kokkos::hwloc::available()) {
|
||||||
|
num_openmp = Kokkos::hwloc::get_available_numa_count() *
|
||||||
|
Kokkos::hwloc::get_available_cores_per_numa() *
|
||||||
|
Kokkos::hwloc::get_available_threads_per_core();
|
||||||
|
|
||||||
|
}
|
||||||
|
std::ostringstream base_file_name;
|
||||||
|
base_file_name << "openmp-" << num_openmp << "-far";
|
||||||
|
Perf::run_performance_tests<Kokkos::OpenMP,false>(base_file_name.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace test
|
||||||
|
|
||||||
126
lib/kokkos/containers/performance_tests/TestThreads.cpp
Executable file
126
lib/kokkos/containers/performance_tests/TestThreads.cpp
Executable file
@ -0,0 +1,126 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#include <Kokkos_UnorderedMap.hpp>
|
||||||
|
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
#include <TestGlobal2LocalIds.hpp>
|
||||||
|
#include <TestUnorderedMapPerformance.hpp>
|
||||||
|
|
||||||
|
#include <iomanip>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
namespace Performance {
|
||||||
|
|
||||||
|
class threads : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase()
|
||||||
|
{
|
||||||
|
std::cout << std::setprecision(5) << std::scientific;
|
||||||
|
|
||||||
|
unsigned num_threads = 4;
|
||||||
|
|
||||||
|
if (Kokkos::hwloc::available()) {
|
||||||
|
num_threads = Kokkos::hwloc::get_available_numa_count() *
|
||||||
|
Kokkos::hwloc::get_available_cores_per_numa() *
|
||||||
|
Kokkos::hwloc::get_available_threads_per_core();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Threads: " << num_threads << std::endl;
|
||||||
|
|
||||||
|
Kokkos::Threads::initialize( num_threads );
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TearDownTestCase()
|
||||||
|
{
|
||||||
|
Kokkos::Threads::finalize();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F( threads, global_2_local)
|
||||||
|
{
|
||||||
|
std::cout << "Threads" << std::endl;
|
||||||
|
std::cout << "size, create, generate, fill, find" << std::endl;
|
||||||
|
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
|
||||||
|
test_global_to_local_ids<Kokkos::Threads>(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F( threads, unordered_map_performance_near)
|
||||||
|
{
|
||||||
|
unsigned num_threads = 4;
|
||||||
|
if (Kokkos::hwloc::available()) {
|
||||||
|
num_threads = Kokkos::hwloc::get_available_numa_count() *
|
||||||
|
Kokkos::hwloc::get_available_cores_per_numa() *
|
||||||
|
Kokkos::hwloc::get_available_threads_per_core();
|
||||||
|
|
||||||
|
}
|
||||||
|
std::ostringstream base_file_name;
|
||||||
|
base_file_name << "threads-" << num_threads << "-near";
|
||||||
|
Perf::run_performance_tests<Kokkos::Threads,true>(base_file_name.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F( threads, unordered_map_performance_far)
|
||||||
|
{
|
||||||
|
unsigned num_threads = 4;
|
||||||
|
if (Kokkos::hwloc::available()) {
|
||||||
|
num_threads = Kokkos::hwloc::get_available_numa_count() *
|
||||||
|
Kokkos::hwloc::get_available_cores_per_numa() *
|
||||||
|
Kokkos::hwloc::get_available_threads_per_core();
|
||||||
|
|
||||||
|
}
|
||||||
|
std::ostringstream base_file_name;
|
||||||
|
base_file_name << "threads-" << num_threads << "-far";
|
||||||
|
Perf::run_performance_tests<Kokkos::Threads,false>(base_file_name.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Performance
|
||||||
|
|
||||||
|
|
||||||
262
lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp
Executable file
262
lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp
Executable file
@ -0,0 +1,262 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
|
||||||
|
#ifndef KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
|
||||||
|
#define KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
|
||||||
|
|
||||||
|
#include <impl/Kokkos_Timer.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Perf {
|
||||||
|
|
||||||
|
template <typename Device, bool Near>
|
||||||
|
struct UnorderedMapTest
|
||||||
|
{
|
||||||
|
typedef Device execution_space;
|
||||||
|
typedef Kokkos::UnorderedMap<uint32_t, uint32_t, execution_space> map_type;
|
||||||
|
typedef typename map_type::histogram_type histogram_type;
|
||||||
|
|
||||||
|
struct value_type {
|
||||||
|
uint32_t failed_count;
|
||||||
|
uint32_t max_list;
|
||||||
|
};
|
||||||
|
|
||||||
|
uint32_t capacity;
|
||||||
|
uint32_t inserts;
|
||||||
|
uint32_t collisions;
|
||||||
|
double seconds;
|
||||||
|
map_type map;
|
||||||
|
histogram_type histogram;
|
||||||
|
|
||||||
|
UnorderedMapTest( uint32_t arg_capacity, uint32_t arg_inserts, uint32_t arg_collisions)
|
||||||
|
: capacity(arg_capacity)
|
||||||
|
, inserts(arg_inserts)
|
||||||
|
, collisions(arg_collisions)
|
||||||
|
, seconds(0)
|
||||||
|
, map(capacity)
|
||||||
|
, histogram(map.get_histogram())
|
||||||
|
{
|
||||||
|
Kokkos::Impl::Timer wall_clock ;
|
||||||
|
wall_clock.reset();
|
||||||
|
|
||||||
|
value_type v = {};
|
||||||
|
int loop_count = 0;
|
||||||
|
do {
|
||||||
|
++loop_count;
|
||||||
|
|
||||||
|
v = value_type();
|
||||||
|
Kokkos::parallel_reduce(inserts, *this, v);
|
||||||
|
|
||||||
|
if (v.failed_count > 0u) {
|
||||||
|
const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + v.failed_count/collisions ;
|
||||||
|
map.rehash( new_capacity );
|
||||||
|
}
|
||||||
|
} while (v.failed_count > 0u);
|
||||||
|
|
||||||
|
seconds = wall_clock.seconds();
|
||||||
|
|
||||||
|
switch (loop_count)
|
||||||
|
{
|
||||||
|
case 1u: std::cout << " \033[0;32m" << loop_count << "\033[0m "; break;
|
||||||
|
case 2u: std::cout << " \033[1;31m" << loop_count << "\033[0m "; break;
|
||||||
|
default: std::cout << " \033[0;31m" << loop_count << "\033[0m "; break;
|
||||||
|
}
|
||||||
|
std::cout << std::setprecision(2) << std::fixed << std::setw(5) << (1e9*(seconds/(inserts))) << "; " << std::flush;
|
||||||
|
|
||||||
|
histogram.calculate();
|
||||||
|
Device::fence();
|
||||||
|
}
|
||||||
|
|
||||||
|
void print(std::ostream & metrics_out, std::ostream & length_out, std::ostream & distance_out, std::ostream & block_distance_out)
|
||||||
|
{
|
||||||
|
metrics_out << map.capacity() << " , ";
|
||||||
|
metrics_out << inserts/collisions << " , ";
|
||||||
|
metrics_out << (100.0 * inserts/collisions) / map.capacity() << " , ";
|
||||||
|
metrics_out << inserts << " , ";
|
||||||
|
metrics_out << (map.failed_insert() ? "true" : "false") << " , ";
|
||||||
|
metrics_out << collisions << " , ";
|
||||||
|
metrics_out << 1e9*(seconds/inserts) << " , ";
|
||||||
|
metrics_out << seconds << std::endl;
|
||||||
|
|
||||||
|
length_out << map.capacity() << " , ";
|
||||||
|
length_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
|
||||||
|
length_out << collisions << " , ";
|
||||||
|
histogram.print_length(length_out);
|
||||||
|
|
||||||
|
distance_out << map.capacity() << " , ";
|
||||||
|
distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
|
||||||
|
distance_out << collisions << " , ";
|
||||||
|
histogram.print_distance(distance_out);
|
||||||
|
|
||||||
|
block_distance_out << map.capacity() << " , ";
|
||||||
|
block_distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
|
||||||
|
block_distance_out << collisions << " , ";
|
||||||
|
histogram.print_block_distance(block_distance_out);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void init( value_type & v ) const
|
||||||
|
{
|
||||||
|
v.failed_count = 0;
|
||||||
|
v.max_list = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void join( volatile value_type & dst, const volatile value_type & src ) const
|
||||||
|
{
|
||||||
|
dst.failed_count += src.failed_count;
|
||||||
|
dst.max_list = src.max_list < dst.max_list ? dst.max_list : src.max_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(uint32_t i, value_type & v) const
|
||||||
|
{
|
||||||
|
const uint32_t key = Near ? i/collisions : i%(inserts/collisions);
|
||||||
|
typename map_type::insert_result result = map.insert(key,i);
|
||||||
|
v.failed_count += !result.failed() ? 0 : 1;
|
||||||
|
v.max_list = result.list_position() < v.max_list ? v.max_list : result.list_position();
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
//#define KOKKOS_COLLECT_UNORDERED_MAP_METRICS
|
||||||
|
|
||||||
|
template <typename Device, bool Near>
|
||||||
|
void run_performance_tests(std::string const & base_file_name)
|
||||||
|
{
|
||||||
|
#if defined(KOKKOS_COLLECT_UNORDERED_MAP_METRICS)
|
||||||
|
std::string metrics_file_name = base_file_name + std::string("-metrics.csv");
|
||||||
|
std::string length_file_name = base_file_name + std::string("-length.csv");
|
||||||
|
std::string distance_file_name = base_file_name + std::string("-distance.csv");
|
||||||
|
std::string block_distance_file_name = base_file_name + std::string("-block_distance.csv");
|
||||||
|
|
||||||
|
std::ofstream metrics_out( metrics_file_name.c_str(), std::ofstream::out );
|
||||||
|
std::ofstream length_out( length_file_name.c_str(), std::ofstream::out );
|
||||||
|
std::ofstream distance_out( distance_file_name.c_str(), std::ofstream::out );
|
||||||
|
std::ofstream block_distance_out( block_distance_file_name.c_str(), std::ofstream::out );
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
const double test_ratios[] = {
|
||||||
|
0.50
|
||||||
|
, 0.75
|
||||||
|
, 0.80
|
||||||
|
, 0.85
|
||||||
|
, 0.90
|
||||||
|
, 0.95
|
||||||
|
, 1.00
|
||||||
|
, 1.25
|
||||||
|
, 2.00
|
||||||
|
};
|
||||||
|
*/
|
||||||
|
|
||||||
|
const double test_ratios[] = { 1.00 };
|
||||||
|
|
||||||
|
const int num_ratios = sizeof(test_ratios) / sizeof(double);
|
||||||
|
|
||||||
|
/*
|
||||||
|
const uint32_t collisions[] {
|
||||||
|
1
|
||||||
|
, 4
|
||||||
|
, 16
|
||||||
|
, 64
|
||||||
|
};
|
||||||
|
*/
|
||||||
|
|
||||||
|
const uint32_t collisions[] = { 16 };
|
||||||
|
|
||||||
|
const int num_collisions = sizeof(collisions) / sizeof(uint32_t);
|
||||||
|
|
||||||
|
// set up file headers
|
||||||
|
metrics_out << "Capacity , Unique , Percent Full , Attempted Inserts , Failed Inserts , Collision Ratio , Nanoseconds/Inserts, Seconds" << std::endl;
|
||||||
|
length_out << "Capacity , Percent Full , ";
|
||||||
|
distance_out << "Capacity , Percent Full , ";
|
||||||
|
block_distance_out << "Capacity , Percent Full , ";
|
||||||
|
|
||||||
|
for (int i=0; i<100; ++i) {
|
||||||
|
length_out << i << " , ";
|
||||||
|
distance_out << i << " , ";
|
||||||
|
block_distance_out << i << " , ";
|
||||||
|
}
|
||||||
|
|
||||||
|
length_out << "\b\b\b " << std::endl;
|
||||||
|
distance_out << "\b\b\b " << std::endl;
|
||||||
|
block_distance_out << "\b\b\b " << std::endl;
|
||||||
|
|
||||||
|
Kokkos::Impl::Timer wall_clock ;
|
||||||
|
for (int i=0; i < num_collisions ; ++i) {
|
||||||
|
wall_clock.reset();
|
||||||
|
std::cout << "Collisions: " << collisions[i] << std::endl;
|
||||||
|
for (int j = 0; j < num_ratios; ++j) {
|
||||||
|
std::cout << std::setprecision(1) << std::fixed << std::setw(5) << (100.0*test_ratios[j]) << "% " << std::flush;
|
||||||
|
for (uint32_t capacity = 1<<14; capacity < 1<<25; capacity = capacity << 1) {
|
||||||
|
uint32_t inserts = static_cast<uint32_t>(test_ratios[j]*(capacity));
|
||||||
|
std::cout << capacity << std::flush;
|
||||||
|
UnorderedMapTest<Device, Near> test(capacity, inserts*collisions[i], collisions[i]);
|
||||||
|
Device::fence();
|
||||||
|
test.print(metrics_out, length_out, distance_out, block_distance_out);
|
||||||
|
}
|
||||||
|
std::cout << "\b\b " << std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
std::cout << " " << wall_clock.seconds() << " secs" << std::endl;
|
||||||
|
}
|
||||||
|
metrics_out.close();
|
||||||
|
length_out.close();
|
||||||
|
distance_out.close();
|
||||||
|
block_distance_out.close();
|
||||||
|
#else
|
||||||
|
(void)base_file_name;
|
||||||
|
std::cout << "skipping test" << std::endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace Perf
|
||||||
|
|
||||||
|
#endif //KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
|
||||||
437
lib/kokkos/containers/src/Kokkos_Bitset.hpp
Executable file
437
lib/kokkos/containers/src/Kokkos_Bitset.hpp
Executable file
@ -0,0 +1,437 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_BITSET_HPP
|
||||||
|
#define KOKKOS_BITSET_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
#include <Kokkos_Functional.hpp>
|
||||||
|
|
||||||
|
#include <impl/Kokkos_Bitset_impl.hpp>
|
||||||
|
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
template <typename Device = Kokkos::DefaultExecutionSpace >
|
||||||
|
class Bitset;
|
||||||
|
|
||||||
|
template <typename Device = Kokkos::DefaultExecutionSpace >
|
||||||
|
class ConstBitset;
|
||||||
|
|
||||||
|
template <typename DstDevice, typename SrcDevice>
|
||||||
|
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
|
||||||
|
|
||||||
|
template <typename DstDevice, typename SrcDevice>
|
||||||
|
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
||||||
|
|
||||||
|
template <typename DstDevice, typename SrcDevice>
|
||||||
|
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
||||||
|
|
||||||
|
|
||||||
|
/// A thread safe view to a bitset
|
||||||
|
template <typename Device>
|
||||||
|
class Bitset
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef Device execution_space;
|
||||||
|
typedef unsigned size_type;
|
||||||
|
|
||||||
|
enum { BIT_SCAN_REVERSE = 1u };
|
||||||
|
enum { MOVE_HINT_BACKWARD = 2u };
|
||||||
|
|
||||||
|
enum {
|
||||||
|
BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u
|
||||||
|
, BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE
|
||||||
|
, BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD
|
||||||
|
, BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
|
||||||
|
enum { block_mask = block_size-1u };
|
||||||
|
enum { block_shift = static_cast<int>(Impl::power_of_two<block_size>::value) };
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
|
||||||
|
/// constructor
|
||||||
|
/// arg_size := number of bit in set
|
||||||
|
Bitset(unsigned arg_size = 0u)
|
||||||
|
: m_size(arg_size)
|
||||||
|
, m_last_block_mask(0u)
|
||||||
|
, m_blocks("Bitset", ((m_size + block_mask) >> block_shift) )
|
||||||
|
{
|
||||||
|
for (int i=0, end = static_cast<int>(m_size & block_mask); i < end; ++i) {
|
||||||
|
m_last_block_mask |= 1u << i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// assignment
|
||||||
|
Bitset<Device> & operator = (Bitset<Device> const & rhs)
|
||||||
|
{
|
||||||
|
this->m_size = rhs.m_size;
|
||||||
|
this->m_last_block_mask = rhs.m_last_block_mask;
|
||||||
|
this->m_blocks = rhs.m_blocks;
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// copy constructor
|
||||||
|
Bitset( Bitset<Device> const & rhs)
|
||||||
|
: m_size( rhs.m_size )
|
||||||
|
, m_last_block_mask( rhs.m_last_block_mask )
|
||||||
|
, m_blocks( rhs.m_blocks )
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// number of bits in the set
|
||||||
|
/// can be call from the host or the device
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
unsigned size() const
|
||||||
|
{ return m_size; }
|
||||||
|
|
||||||
|
/// number of bits which are set to 1
|
||||||
|
/// can only be called from the host
|
||||||
|
unsigned count() const
|
||||||
|
{
|
||||||
|
Impl::BitsetCount< Bitset<Device> > f(*this);
|
||||||
|
return f.apply();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// set all bits to 1
|
||||||
|
/// can only be called from the host
|
||||||
|
void set()
|
||||||
|
{
|
||||||
|
Kokkos::deep_copy(m_blocks, ~0u );
|
||||||
|
|
||||||
|
if (m_last_block_mask) {
|
||||||
|
//clear the unused bits in the last block
|
||||||
|
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
||||||
|
raw_deep_copy( m_blocks.ptr_on_device() + (m_blocks.dimension_0() -1u), &m_last_block_mask, sizeof(unsigned));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// set all bits to 0
|
||||||
|
/// can only be called from the host
|
||||||
|
void reset()
|
||||||
|
{
|
||||||
|
Kokkos::deep_copy(m_blocks, 0u );
|
||||||
|
}
|
||||||
|
|
||||||
|
/// set all bits to 0
|
||||||
|
/// can only be called from the host
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
Kokkos::deep_copy(m_blocks, 0u );
|
||||||
|
}
|
||||||
|
|
||||||
|
/// set i'th bit to 1
|
||||||
|
/// can only be called from the device
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool set( unsigned i ) const
|
||||||
|
{
|
||||||
|
if ( i < m_size ) {
|
||||||
|
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
|
||||||
|
const unsigned mask = 1u << static_cast<int>( i & block_mask );
|
||||||
|
|
||||||
|
return !( atomic_fetch_or( block_ptr, mask ) & mask );
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// set i'th bit to 0
|
||||||
|
/// can only be called from the device
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool reset( unsigned i ) const
|
||||||
|
{
|
||||||
|
if ( i < m_size ) {
|
||||||
|
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
|
||||||
|
const unsigned mask = 1u << static_cast<int>( i & block_mask );
|
||||||
|
|
||||||
|
return atomic_fetch_and( block_ptr, ~mask ) & mask;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// return true if the i'th bit set to 1
|
||||||
|
/// can only be called from the device
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool test( unsigned i ) const
|
||||||
|
{
|
||||||
|
if ( i < m_size ) {
|
||||||
|
const unsigned block = volatile_load(&m_blocks[ i >> block_shift ]);
|
||||||
|
const unsigned mask = 1u << static_cast<int>( i & block_mask );
|
||||||
|
return block & mask;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// used with find_any_set_near or find_any_unset_near functions
|
||||||
|
/// returns the max number of times those functions should be call
|
||||||
|
/// when searching for an available bit
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
unsigned max_hint() const
|
||||||
|
{
|
||||||
|
return m_blocks.dimension_0();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// find a bit set to 1 near the hint
|
||||||
|
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
|
||||||
|
/// and if result.first is false the result.second is a new hint
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Kokkos::pair<bool, unsigned> find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
|
||||||
|
{
|
||||||
|
const unsigned block_idx = (hint >> block_shift) < m_blocks.dimension_0() ? (hint >> block_shift) : 0;
|
||||||
|
const unsigned offset = hint & block_mask;
|
||||||
|
unsigned block = volatile_load(&m_blocks[ block_idx ]);
|
||||||
|
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1)) ? block : block & m_last_block_mask ;
|
||||||
|
|
||||||
|
return find_any_helper(block_idx, offset, block, scan_direction);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// find a bit set to 0 near the hint
|
||||||
|
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
|
||||||
|
/// and if result.first is false the result.second is a new hint
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Kokkos::pair<bool, unsigned> find_any_unset_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
|
||||||
|
{
|
||||||
|
const unsigned block_idx = hint >> block_shift;
|
||||||
|
const unsigned offset = hint & block_mask;
|
||||||
|
unsigned block = volatile_load(&m_blocks[ block_idx ]);
|
||||||
|
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1) ) ? ~block : ~block & m_last_block_mask ;
|
||||||
|
|
||||||
|
return find_any_helper(block_idx, offset, block, scan_direction);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
Kokkos::pair<bool, unsigned> find_any_helper(unsigned block_idx, unsigned offset, unsigned block, unsigned scan_direction) const
|
||||||
|
{
|
||||||
|
Kokkos::pair<bool, unsigned> result( block > 0u, 0);
|
||||||
|
|
||||||
|
if (!result.first) {
|
||||||
|
result.second = update_hint( block_idx, offset, scan_direction );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result.second = scan_block( (block_idx << block_shift)
|
||||||
|
, offset
|
||||||
|
, block
|
||||||
|
, scan_direction
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
unsigned scan_block(unsigned block_start, int offset, unsigned block, unsigned scan_direction ) const
|
||||||
|
{
|
||||||
|
offset = !(scan_direction & BIT_SCAN_REVERSE) ? offset : (offset + block_mask) & block_mask;
|
||||||
|
block = Impl::rotate_right(block, offset);
|
||||||
|
return ((( !(scan_direction & BIT_SCAN_REVERSE) ?
|
||||||
|
Impl::bit_scan_forward(block) :
|
||||||
|
Impl::bit_scan_reverse(block)
|
||||||
|
) + offset
|
||||||
|
) & block_mask
|
||||||
|
) + block_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const
|
||||||
|
{
|
||||||
|
block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
|
||||||
|
block_idx = block_idx >= 0 ? block_idx : m_blocks.dimension_0() - 1;
|
||||||
|
block_idx = block_idx < static_cast<long long>(m_blocks.dimension_0()) ? block_idx : 0;
|
||||||
|
|
||||||
|
return static_cast<unsigned>(block_idx)*block_size + offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
unsigned m_size;
|
||||||
|
unsigned m_last_block_mask;
|
||||||
|
View< unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
|
||||||
|
|
||||||
|
private:
|
||||||
|
template <typename DDevice>
|
||||||
|
friend class Bitset;
|
||||||
|
|
||||||
|
template <typename DDevice>
|
||||||
|
friend class ConstBitset;
|
||||||
|
|
||||||
|
template <typename Bitset>
|
||||||
|
friend struct Impl::BitsetCount;
|
||||||
|
|
||||||
|
template <typename DstDevice, typename SrcDevice>
|
||||||
|
friend void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
|
||||||
|
|
||||||
|
template <typename DstDevice, typename SrcDevice>
|
||||||
|
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
||||||
|
};
|
||||||
|
|
||||||
|
/// a thread-safe view to a const bitset
|
||||||
|
/// i.e. can only test bits
|
||||||
|
template <typename Device>
|
||||||
|
class ConstBitset
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef Device execution_space;
|
||||||
|
typedef unsigned size_type;
|
||||||
|
|
||||||
|
private:
|
||||||
|
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
|
||||||
|
enum { block_mask = block_size -1u };
|
||||||
|
enum { block_shift = static_cast<int>(Impl::power_of_two<block_size>::value) };
|
||||||
|
|
||||||
|
public:
|
||||||
|
ConstBitset()
|
||||||
|
: m_size (0)
|
||||||
|
{}
|
||||||
|
|
||||||
|
ConstBitset(Bitset<Device> const& rhs)
|
||||||
|
: m_size(rhs.m_size)
|
||||||
|
, m_blocks(rhs.m_blocks)
|
||||||
|
{}
|
||||||
|
|
||||||
|
ConstBitset(ConstBitset<Device> const& rhs)
|
||||||
|
: m_size( rhs.m_size )
|
||||||
|
, m_blocks( rhs.m_blocks )
|
||||||
|
{}
|
||||||
|
|
||||||
|
ConstBitset<Device> & operator = (Bitset<Device> const & rhs)
|
||||||
|
{
|
||||||
|
this->m_size = rhs.m_size;
|
||||||
|
this->m_blocks = rhs.m_blocks;
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstBitset<Device> & operator = (ConstBitset<Device> const & rhs)
|
||||||
|
{
|
||||||
|
this->m_size = rhs.m_size;
|
||||||
|
this->m_blocks = rhs.m_blocks;
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
unsigned size() const
|
||||||
|
{
|
||||||
|
return m_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned count() const
|
||||||
|
{
|
||||||
|
Impl::BitsetCount< ConstBitset<Device> > f(*this);
|
||||||
|
return f.apply();
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool test( unsigned i ) const
|
||||||
|
{
|
||||||
|
if ( i < m_size ) {
|
||||||
|
const unsigned block = m_blocks[ i >> block_shift ];
|
||||||
|
const unsigned mask = 1u << static_cast<int>( i & block_mask );
|
||||||
|
return block & mask;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
unsigned m_size;
|
||||||
|
View< const unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
|
||||||
|
|
||||||
|
private:
|
||||||
|
template <typename DDevice>
|
||||||
|
friend class ConstBitset;
|
||||||
|
|
||||||
|
template <typename Bitset>
|
||||||
|
friend struct Impl::BitsetCount;
|
||||||
|
|
||||||
|
template <typename DstDevice, typename SrcDevice>
|
||||||
|
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
||||||
|
|
||||||
|
template <typename DstDevice, typename SrcDevice>
|
||||||
|
friend void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename DstDevice, typename SrcDevice>
|
||||||
|
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src)
|
||||||
|
{
|
||||||
|
if (dst.size() != src.size()) {
|
||||||
|
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
|
||||||
|
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename DstDevice, typename SrcDevice>
|
||||||
|
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
|
||||||
|
{
|
||||||
|
if (dst.size() != src.size()) {
|
||||||
|
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
|
||||||
|
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename DstDevice, typename SrcDevice>
|
||||||
|
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
|
||||||
|
{
|
||||||
|
if (dst.size() != src.size()) {
|
||||||
|
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
|
||||||
|
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#endif //KOKKOS_BITSET_HPP
|
||||||
840
lib/kokkos/containers/src/Kokkos_DualView.hpp
Executable file
840
lib/kokkos/containers/src/Kokkos_DualView.hpp
Executable file
@ -0,0 +1,840 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
/// \file Kokkos_DualView.hpp
|
||||||
|
/// \brief Declaration and definition of Kokkos::DualView.
|
||||||
|
///
|
||||||
|
/// This header file declares and defines Kokkos::DualView and its
|
||||||
|
/// related nonmember functions.
|
||||||
|
|
||||||
|
#ifndef KOKKOS_DUALVIEW_HPP
|
||||||
|
#define KOKKOS_DUALVIEW_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
#include <impl/Kokkos_Error.hpp>
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/* \class DualView
|
||||||
|
* \brief Container to manage mirroring a Kokkos::View that lives
|
||||||
|
* in device memory with a Kokkos::View that lives in host memory.
|
||||||
|
*
|
||||||
|
* This class provides capabilities to manage data which exists in two
|
||||||
|
* memory spaces at the same time. It keeps views of the same layout
|
||||||
|
* on two memory spaces as well as modified flags for both
|
||||||
|
* allocations. Users are responsible for setting the modified flags
|
||||||
|
* manually if they change the data in either memory space, by calling
|
||||||
|
* the sync() method templated on the device where they modified the
|
||||||
|
* data. Users may synchronize data by calling the modify() function,
|
||||||
|
* templated on the device towards which they want to synchronize
|
||||||
|
* (i.e., the target of the one-way copy operation).
|
||||||
|
*
|
||||||
|
* The DualView class also provides convenience methods such as
|
||||||
|
* realloc, resize and capacity which call the appropriate methods of
|
||||||
|
* the underlying Kokkos::View objects.
|
||||||
|
*
|
||||||
|
* The four template arguments are the same as those of Kokkos::View.
|
||||||
|
* (Please refer to that class' documentation for a detailed
|
||||||
|
* description.)
|
||||||
|
*
|
||||||
|
* \tparam DataType The type of the entries stored in the container.
|
||||||
|
*
|
||||||
|
* \tparam Layout The array's layout in memory.
|
||||||
|
*
|
||||||
|
* \tparam Device The Kokkos Device type. If its memory space is
|
||||||
|
* not the same as the host's memory space, then DualView will
|
||||||
|
* contain two separate Views: one in device memory, and one in
|
||||||
|
* host memory. Otherwise, DualView will only store one View.
|
||||||
|
*
|
||||||
|
* \tparam MemoryTraits (optional) The user's intended memory access
|
||||||
|
* behavior. Please see the documentation of Kokkos::View for
|
||||||
|
* examples. The default suffices for most users.
|
||||||
|
*/
|
||||||
|
template< class DataType ,
|
||||||
|
class Arg1Type = void ,
|
||||||
|
class Arg2Type = void ,
|
||||||
|
class Arg3Type = void>
|
||||||
|
class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
//! \name Typedefs for device types and various Kokkos::View specializations.
|
||||||
|
//@{
|
||||||
|
typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
|
||||||
|
|
||||||
|
//! The Kokkos Host Device type;
|
||||||
|
typedef typename traits::host_mirror_space host_mirror_space ;
|
||||||
|
|
||||||
|
//! The type of a Kokkos::View on the device.
|
||||||
|
typedef View< typename traits::data_type ,
|
||||||
|
typename traits::array_layout ,
|
||||||
|
typename traits::device_type ,
|
||||||
|
typename traits::memory_traits > t_dev ;
|
||||||
|
|
||||||
|
/// \typedef t_host
|
||||||
|
/// \brief The type of a Kokkos::View host mirror of \c t_dev.
|
||||||
|
typedef typename t_dev::HostMirror t_host ;
|
||||||
|
|
||||||
|
//! The type of a const View on the device.
|
||||||
|
//! The type of a Kokkos::View on the device.
|
||||||
|
typedef View< typename traits::const_data_type ,
|
||||||
|
typename traits::array_layout ,
|
||||||
|
typename traits::device_type ,
|
||||||
|
typename traits::memory_traits > t_dev_const ;
|
||||||
|
|
||||||
|
/// \typedef t_host_const
|
||||||
|
/// \brief The type of a const View host mirror of \c t_dev_const.
|
||||||
|
typedef typename t_dev_const::HostMirror t_host_const;
|
||||||
|
|
||||||
|
//! The type of a const, random-access View on the device.
|
||||||
|
typedef View< typename traits::const_data_type ,
|
||||||
|
typename traits::array_layout ,
|
||||||
|
typename traits::device_type ,
|
||||||
|
MemoryRandomAccess > t_dev_const_randomread ;
|
||||||
|
|
||||||
|
/// \typedef t_host_const_randomread
|
||||||
|
/// \brief The type of a const, random-access View host mirror of
|
||||||
|
/// \c t_dev_const_randomread.
|
||||||
|
typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread;
|
||||||
|
|
||||||
|
//! The type of an unmanaged View on the device.
|
||||||
|
typedef View< typename traits::data_type ,
|
||||||
|
typename traits::array_layout ,
|
||||||
|
typename traits::device_type ,
|
||||||
|
MemoryUnmanaged> t_dev_um;
|
||||||
|
|
||||||
|
//! The type of an unmanaged View host mirror of \c t_dev_um.
|
||||||
|
typedef View< typename t_host::data_type ,
|
||||||
|
typename t_host::array_layout ,
|
||||||
|
typename t_host::device_type ,
|
||||||
|
MemoryUnmanaged> t_host_um;
|
||||||
|
|
||||||
|
//! The type of a const unmanaged View on the device.
|
||||||
|
typedef View< typename traits::const_data_type ,
|
||||||
|
typename traits::array_layout ,
|
||||||
|
typename traits::device_type ,
|
||||||
|
MemoryUnmanaged> t_dev_const_um;
|
||||||
|
|
||||||
|
//! The type of a const unmanaged View host mirror of \c t_dev_const_um.
|
||||||
|
typedef View<typename t_host::const_data_type,
|
||||||
|
typename t_host::array_layout,
|
||||||
|
typename t_host::device_type,
|
||||||
|
MemoryUnmanaged> t_host_const_um;
|
||||||
|
|
||||||
|
//@}
|
||||||
|
//! \name The two View instances.
|
||||||
|
//@{
|
||||||
|
|
||||||
|
t_dev d_view;
|
||||||
|
t_host h_view;
|
||||||
|
|
||||||
|
//@}
|
||||||
|
//! \name Counters to keep track of changes ("modified" flags)
|
||||||
|
//@{
|
||||||
|
|
||||||
|
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_device;
|
||||||
|
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_host;
|
||||||
|
|
||||||
|
//@}
|
||||||
|
//! \name Constructors
|
||||||
|
//@{
|
||||||
|
|
||||||
|
/// \brief Empty constructor.
|
||||||
|
///
|
||||||
|
/// Both device and host View objects are constructed using their
|
||||||
|
/// default constructors. The "modified" flags are both initialized
|
||||||
|
/// to "unmodified."
|
||||||
|
DualView () :
|
||||||
|
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
|
||||||
|
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// \brief Constructor that allocates View objects on both host and device.
|
||||||
|
///
|
||||||
|
/// This constructor works like the analogous constructor of View.
|
||||||
|
/// The first argument is a string label, which is entirely for your
|
||||||
|
/// benefit. (Different DualView objects may have the same label if
|
||||||
|
/// you like.) The arguments that follow are the dimensions of the
|
||||||
|
/// View objects. For example, if the View has three dimensions,
|
||||||
|
/// the first three integer arguments will be nonzero, and you may
|
||||||
|
/// omit the integer arguments that follow.
|
||||||
|
DualView (const std::string& label,
|
||||||
|
const size_t n0 = 0,
|
||||||
|
const size_t n1 = 0,
|
||||||
|
const size_t n2 = 0,
|
||||||
|
const size_t n3 = 0,
|
||||||
|
const size_t n4 = 0,
|
||||||
|
const size_t n5 = 0,
|
||||||
|
const size_t n6 = 0,
|
||||||
|
const size_t n7 = 0)
|
||||||
|
: d_view (label, n0, n1, n2, n3, n4, n5, n6, n7)
|
||||||
|
, h_view (create_mirror_view (d_view)) // without UVM, host View mirrors
|
||||||
|
, modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device"))
|
||||||
|
, modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
|
||||||
|
{}
|
||||||
|
|
||||||
|
//! Copy constructor (shallow copy)
|
||||||
|
template<class SS, class LS, class DS, class MS>
|
||||||
|
DualView (const DualView<SS,LS,DS,MS>& src) :
|
||||||
|
d_view (src.d_view),
|
||||||
|
h_view (src.h_view),
|
||||||
|
modified_device (src.modified_device),
|
||||||
|
modified_host (src.modified_host)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// \brief Create DualView from existing device and host View objects.
|
||||||
|
///
|
||||||
|
/// This constructor assumes that the device and host View objects
|
||||||
|
/// are synchronized. You, the caller, are responsible for making
|
||||||
|
/// sure this is the case before calling this constructor. After
|
||||||
|
/// this constructor returns, you may use DualView's sync() and
|
||||||
|
/// modify() methods to ensure synchronization of the View objects.
|
||||||
|
///
|
||||||
|
/// \param d_view_ Device View
|
||||||
|
/// \param h_view_ Host View (must have type t_host = t_dev::HostMirror)
|
||||||
|
DualView (const t_dev& d_view_, const t_host& h_view_) :
|
||||||
|
d_view (d_view_),
|
||||||
|
h_view (h_view_),
|
||||||
|
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
|
||||||
|
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
|
||||||
|
{
|
||||||
|
Impl::assert_shapes_are_equal (d_view.shape (), h_view.shape ());
|
||||||
|
}
|
||||||
|
|
||||||
|
//@}
|
||||||
|
//! \name Methods for synchronizing, marking as modified, and getting Views.
|
||||||
|
//@{
|
||||||
|
|
||||||
|
/// \brief Return a View on a specific device \c Device.
|
||||||
|
///
|
||||||
|
/// Please don't be afraid of the if_c expression in the return
|
||||||
|
/// value's type. That just tells the method what the return type
|
||||||
|
/// should be: t_dev if the \c Device template parameter matches
|
||||||
|
/// this DualView's device type, else t_host.
|
||||||
|
///
|
||||||
|
/// For example, suppose you create a DualView on Cuda, like this:
|
||||||
|
/// \code
|
||||||
|
/// typedef Kokkos::DualView<float, Kokkos::LayoutRight, Kokkos::Cuda> dual_view_type;
|
||||||
|
/// dual_view_type DV ("my dual view", 100);
|
||||||
|
/// \endcode
|
||||||
|
/// If you want to get the CUDA device View, do this:
|
||||||
|
/// \code
|
||||||
|
/// typename dual_view_type::t_dev cudaView = DV.view<Kokkos::Cuda> ();
|
||||||
|
/// \endcode
|
||||||
|
/// and if you want to get the host mirror of that View, do this:
|
||||||
|
/// \code
|
||||||
|
/// typedef typename Kokkos::HostSpace::execution_space host_device_type;
|
||||||
|
/// typename dual_view_type::t_host hostView = DV.view<host_device_type> ();
|
||||||
|
/// \endcode
|
||||||
|
template< class Device >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
const typename Impl::if_c<
|
||||||
|
Impl::is_same<typename t_dev::memory_space,
|
||||||
|
typename Device::memory_space>::value,
|
||||||
|
t_dev,
|
||||||
|
t_host>::type& view () const
|
||||||
|
{
|
||||||
|
return Impl::if_c<
|
||||||
|
Impl::is_same<
|
||||||
|
typename t_dev::memory_space,
|
||||||
|
typename Device::memory_space>::value,
|
||||||
|
t_dev,
|
||||||
|
t_host >::select (d_view , h_view);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Update data on device or host only if data in the other
|
||||||
|
/// space has been marked as modified.
|
||||||
|
///
|
||||||
|
/// If \c Device is the same as this DualView's device type, then
|
||||||
|
/// copy data from host to device. Otherwise, copy data from device
|
||||||
|
/// to host. In either case, only copy if the source of the copy
|
||||||
|
/// has been modified.
|
||||||
|
///
|
||||||
|
/// This is a one-way synchronization only. If the target of the
|
||||||
|
/// copy has been modified, this operation will discard those
|
||||||
|
/// modifications. It will also reset both device and host modified
|
||||||
|
/// flags.
|
||||||
|
///
|
||||||
|
/// \note This method doesn't know on its own whether you modified
|
||||||
|
/// the data in either View. You must manually mark modified data
|
||||||
|
/// as modified, by calling the modify() method with the
|
||||||
|
/// appropriate template parameter.
|
||||||
|
template<class Device>
|
||||||
|
void sync( const typename Impl::enable_if<
|
||||||
|
( Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) ||
|
||||||
|
( Impl::is_same< Device , int>::value)
|
||||||
|
, int >::type& = 0)
|
||||||
|
{
|
||||||
|
const unsigned int dev =
|
||||||
|
Impl::if_c<
|
||||||
|
Impl::is_same<
|
||||||
|
typename t_dev::memory_space,
|
||||||
|
typename Device::memory_space>::value ,
|
||||||
|
unsigned int,
|
||||||
|
unsigned int>::select (1, 0);
|
||||||
|
|
||||||
|
if (dev) { // if Device is the same as DualView's device type
|
||||||
|
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
|
||||||
|
deep_copy (d_view, h_view);
|
||||||
|
modified_host() = modified_device() = 0;
|
||||||
|
}
|
||||||
|
} else { // hopefully Device is the same as DualView's host type
|
||||||
|
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
|
||||||
|
deep_copy (h_view, d_view);
|
||||||
|
modified_host() = modified_device() = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(Impl::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) {
|
||||||
|
t_dev::execution_space::fence();
|
||||||
|
t_host::execution_space::fence();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Device>
|
||||||
|
void sync ( const typename Impl::enable_if<
|
||||||
|
( ! Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) ||
|
||||||
|
( Impl::is_same< Device , int>::value)
|
||||||
|
, int >::type& = 0 )
|
||||||
|
{
|
||||||
|
const unsigned int dev =
|
||||||
|
Impl::if_c<
|
||||||
|
Impl::is_same<
|
||||||
|
typename t_dev::memory_space,
|
||||||
|
typename Device::memory_space>::value,
|
||||||
|
unsigned int,
|
||||||
|
unsigned int>::select (1, 0);
|
||||||
|
if (dev) { // if Device is the same as DualView's device type
|
||||||
|
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
|
||||||
|
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
|
||||||
|
}
|
||||||
|
} else { // hopefully Device is the same as DualView's host type
|
||||||
|
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
|
||||||
|
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// \brief Mark data as modified on the given device \c Device.
|
||||||
|
///
|
||||||
|
/// If \c Device is the same as this DualView's device type, then
|
||||||
|
/// mark the device's data as modified. Otherwise, mark the host's
|
||||||
|
/// data as modified.
|
||||||
|
template<class Device>
|
||||||
|
void modify () {
|
||||||
|
const unsigned int dev =
|
||||||
|
Impl::if_c<
|
||||||
|
Impl::is_same<
|
||||||
|
typename t_dev::memory_space,
|
||||||
|
typename Device::memory_space>::value,
|
||||||
|
unsigned int,
|
||||||
|
unsigned int>::select (1, 0);
|
||||||
|
|
||||||
|
if (dev) { // if Device is the same as DualView's device type
|
||||||
|
// Increment the device's modified count.
|
||||||
|
modified_device () = (modified_device () > modified_host () ?
|
||||||
|
modified_device () : modified_host ()) + 1;
|
||||||
|
} else { // hopefully Device is the same as DualView's host type
|
||||||
|
// Increment the host's modified count.
|
||||||
|
modified_host () = (modified_device () > modified_host () ?
|
||||||
|
modified_device () : modified_host ()) + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//@}
|
||||||
|
//! \name Methods for reallocating or resizing the View objects.
|
||||||
|
//@{
|
||||||
|
|
||||||
|
/// \brief Reallocate both View objects.
|
||||||
|
///
|
||||||
|
/// This discards any existing contents of the objects, and resets
|
||||||
|
/// their modified flags. It does <i>not</i> copy the old contents
|
||||||
|
/// of either View into the new View objects.
|
||||||
|
void realloc( const size_t n0 = 0 ,
|
||||||
|
const size_t n1 = 0 ,
|
||||||
|
const size_t n2 = 0 ,
|
||||||
|
const size_t n3 = 0 ,
|
||||||
|
const size_t n4 = 0 ,
|
||||||
|
const size_t n5 = 0 ,
|
||||||
|
const size_t n6 = 0 ,
|
||||||
|
const size_t n7 = 0 ) {
|
||||||
|
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
|
||||||
|
h_view = create_mirror_view( d_view );
|
||||||
|
|
||||||
|
/* Reset dirty flags */
|
||||||
|
modified_device() = modified_host() = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Resize both views, copying old contents into new if necessary.
|
||||||
|
///
|
||||||
|
/// This method only copies the old contents into the new View
|
||||||
|
/// objects for the device which was last marked as modified.
|
||||||
|
void resize( const size_t n0 = 0 ,
|
||||||
|
const size_t n1 = 0 ,
|
||||||
|
const size_t n2 = 0 ,
|
||||||
|
const size_t n3 = 0 ,
|
||||||
|
const size_t n4 = 0 ,
|
||||||
|
const size_t n5 = 0 ,
|
||||||
|
const size_t n6 = 0 ,
|
||||||
|
const size_t n7 = 0 ) {
|
||||||
|
if(modified_device() >= modified_host()) {
|
||||||
|
/* Resize on Device */
|
||||||
|
::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
|
||||||
|
h_view = create_mirror_view( d_view );
|
||||||
|
|
||||||
|
/* Mark Device copy as modified */
|
||||||
|
modified_device() = modified_device()+1;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
/* Realloc on Device */
|
||||||
|
|
||||||
|
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
|
||||||
|
t_host temp_view = create_mirror_view( d_view );
|
||||||
|
|
||||||
|
/* Remap on Host */
|
||||||
|
Kokkos::deep_copy( temp_view , h_view );
|
||||||
|
|
||||||
|
h_view = temp_view;
|
||||||
|
|
||||||
|
/* Mark Host copy as modified */
|
||||||
|
modified_host() = modified_host()+1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//@}
|
||||||
|
//! \name Methods for getting capacity, stride, or dimension(s).
|
||||||
|
//@{
|
||||||
|
|
||||||
|
//! The allocation size (same as Kokkos::View::capacity).
|
||||||
|
size_t capacity() const {
|
||||||
|
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
||||||
|
return d_view.span();
|
||||||
|
#else
|
||||||
|
return d_view.capacity();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
//! Get stride(s) for each dimension.
|
||||||
|
template< typename iType>
|
||||||
|
void stride(iType* stride_) const {
|
||||||
|
d_view.stride(stride_);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* \brief return size of dimension 0 */
|
||||||
|
size_t dimension_0() const {return d_view.dimension_0();}
|
||||||
|
/* \brief return size of dimension 1 */
|
||||||
|
size_t dimension_1() const {return d_view.dimension_1();}
|
||||||
|
/* \brief return size of dimension 2 */
|
||||||
|
size_t dimension_2() const {return d_view.dimension_2();}
|
||||||
|
/* \brief return size of dimension 3 */
|
||||||
|
size_t dimension_3() const {return d_view.dimension_3();}
|
||||||
|
/* \brief return size of dimension 4 */
|
||||||
|
size_t dimension_4() const {return d_view.dimension_4();}
|
||||||
|
/* \brief return size of dimension 5 */
|
||||||
|
size_t dimension_5() const {return d_view.dimension_5();}
|
||||||
|
/* \brief return size of dimension 6 */
|
||||||
|
size_t dimension_6() const {return d_view.dimension_6();}
|
||||||
|
/* \brief return size of dimension 7 */
|
||||||
|
size_t dimension_7() const {return d_view.dimension_7();}
|
||||||
|
|
||||||
|
//@}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
//
|
||||||
|
// Partial specializations of Kokkos::subview() for DualView objects.
|
||||||
|
//
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||||
|
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||||
|
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
|
||||||
|
>
|
||||||
|
struct ViewSubview< DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type >
|
||||||
|
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||||
|
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > SrcViewType ;
|
||||||
|
|
||||||
|
enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
|
||||||
|
enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
|
||||||
|
enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
|
||||||
|
enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
|
||||||
|
enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
|
||||||
|
enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
|
||||||
|
enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
|
||||||
|
enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
|
||||||
|
|
||||||
|
// The source view rank must be equal to the input argument rank
|
||||||
|
// Once a void argument is encountered all subsequent arguments must be void.
|
||||||
|
enum { InputRank =
|
||||||
|
Impl::StaticAssert<( SrcViewType::rank ==
|
||||||
|
( V0 ? 0 : (
|
||||||
|
V1 ? 1 : (
|
||||||
|
V2 ? 2 : (
|
||||||
|
V3 ? 3 : (
|
||||||
|
V4 ? 4 : (
|
||||||
|
V5 ? 5 : (
|
||||||
|
V6 ? 6 : (
|
||||||
|
V7 ? 7 : 8 ))))))) ))
|
||||||
|
&&
|
||||||
|
( SrcViewType::rank ==
|
||||||
|
( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
|
||||||
|
>::value ? SrcViewType::rank : 0 };
|
||||||
|
|
||||||
|
enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
|
||||||
|
enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
|
||||||
|
enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
|
||||||
|
enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
|
||||||
|
enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
|
||||||
|
enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
|
||||||
|
enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
|
||||||
|
enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
|
||||||
|
|
||||||
|
enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
|
||||||
|
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
|
||||||
|
|
||||||
|
// Reverse
|
||||||
|
enum { R0_rev = 0 == InputRank ? 0u : (
|
||||||
|
1 == InputRank ? unsigned(R0) : (
|
||||||
|
2 == InputRank ? unsigned(R1) : (
|
||||||
|
3 == InputRank ? unsigned(R2) : (
|
||||||
|
4 == InputRank ? unsigned(R3) : (
|
||||||
|
5 == InputRank ? unsigned(R4) : (
|
||||||
|
6 == InputRank ? unsigned(R5) : (
|
||||||
|
7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
|
||||||
|
|
||||||
|
typedef typename SrcViewType::array_layout SrcViewLayout ;
|
||||||
|
|
||||||
|
// Choose array layout, attempting to preserve original layout if at all possible.
|
||||||
|
typedef typename Impl::if_c<
|
||||||
|
( // Same Layout IF
|
||||||
|
// OutputRank 0
|
||||||
|
( OutputRank == 0 )
|
||||||
|
||
|
||||||
|
// OutputRank 1 or 2, InputLayout Left, Interval 0
|
||||||
|
// because single stride one or second index has a stride.
|
||||||
|
( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
|
||||||
|
||
|
||||||
|
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
|
||||||
|
// because single stride one or second index has a stride.
|
||||||
|
( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
|
||||||
|
), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
|
||||||
|
|
||||||
|
// Choose data type as a purely dynamic rank array to accomodate a runtime range.
|
||||||
|
typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
|
||||||
|
typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
|
||||||
|
typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
|
||||||
|
typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
|
||||||
|
typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
|
||||||
|
typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
|
||||||
|
typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
|
||||||
|
typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
|
||||||
|
typename SrcViewType::value_type ********
|
||||||
|
>::type >::type >::type >::type >::type >::type >::type >::type OutputData ;
|
||||||
|
|
||||||
|
// Choose space.
|
||||||
|
// If the source view's template arg1 or arg2 is a space then use it,
|
||||||
|
// otherwise use the source view's execution space.
|
||||||
|
|
||||||
|
typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
|
||||||
|
typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::execution_space
|
||||||
|
>::type >::type OutputSpace ;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
// If keeping the layout then match non-data type arguments
|
||||||
|
// else keep execution space and memory traits.
|
||||||
|
typedef typename
|
||||||
|
Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
|
||||||
|
, Kokkos::DualView< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type >
|
||||||
|
, Kokkos::DualView< OutputData , OutputViewLayout , OutputSpace
|
||||||
|
, typename SrcViewType::memory_traits >
|
||||||
|
>::type type ;
|
||||||
|
};
|
||||||
|
|
||||||
|
} /* namespace Impl */
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
template< class D , class A1 , class A2 , class A3 ,
|
||||||
|
class ArgType0 >
|
||||||
|
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , void , void , void
|
||||||
|
, void , void , void , void
|
||||||
|
>::type
|
||||||
|
subview( const DualView<D,A1,A2,A3> & src ,
|
||||||
|
const ArgType0 & arg0 )
|
||||||
|
{
|
||||||
|
typedef typename
|
||||||
|
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , void , void , void
|
||||||
|
, void , void , void , void
|
||||||
|
>::type
|
||||||
|
DstViewType ;
|
||||||
|
DstViewType sub_view;
|
||||||
|
sub_view.d_view = subview(src.d_view,arg0);
|
||||||
|
sub_view.h_view = subview(src.h_view,arg0);
|
||||||
|
sub_view.modified_device = src.modified_device;
|
||||||
|
sub_view.modified_host = src.modified_host;
|
||||||
|
return sub_view;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template< class D , class A1 , class A2 , class A3 ,
|
||||||
|
class ArgType0 , class ArgType1 >
|
||||||
|
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , void , void
|
||||||
|
, void , void , void , void
|
||||||
|
>::type
|
||||||
|
subview( const DualView<D,A1,A2,A3> & src ,
|
||||||
|
const ArgType0 & arg0 ,
|
||||||
|
const ArgType1 & arg1 )
|
||||||
|
{
|
||||||
|
typedef typename
|
||||||
|
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , void , void
|
||||||
|
, void , void , void , void
|
||||||
|
>::type
|
||||||
|
DstViewType ;
|
||||||
|
DstViewType sub_view;
|
||||||
|
sub_view.d_view = subview(src.d_view,arg0,arg1);
|
||||||
|
sub_view.h_view = subview(src.h_view,arg0,arg1);
|
||||||
|
sub_view.modified_device = src.modified_device;
|
||||||
|
sub_view.modified_host = src.modified_host;
|
||||||
|
return sub_view;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class D , class A1 , class A2 , class A3 ,
|
||||||
|
class ArgType0 , class ArgType1 , class ArgType2 >
|
||||||
|
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , ArgType2 , void
|
||||||
|
, void , void , void , void
|
||||||
|
>::type
|
||||||
|
subview( const DualView<D,A1,A2,A3> & src ,
|
||||||
|
const ArgType0 & arg0 ,
|
||||||
|
const ArgType1 & arg1 ,
|
||||||
|
const ArgType2 & arg2 )
|
||||||
|
{
|
||||||
|
typedef typename
|
||||||
|
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , ArgType2 , void
|
||||||
|
, void , void , void , void
|
||||||
|
>::type
|
||||||
|
DstViewType ;
|
||||||
|
DstViewType sub_view;
|
||||||
|
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2);
|
||||||
|
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2);
|
||||||
|
sub_view.modified_device = src.modified_device;
|
||||||
|
sub_view.modified_host = src.modified_host;
|
||||||
|
return sub_view;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class D , class A1 , class A2 , class A3 ,
|
||||||
|
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 >
|
||||||
|
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||||
|
, void , void , void , void
|
||||||
|
>::type
|
||||||
|
subview( const DualView<D,A1,A2,A3> & src ,
|
||||||
|
const ArgType0 & arg0 ,
|
||||||
|
const ArgType1 & arg1 ,
|
||||||
|
const ArgType2 & arg2 ,
|
||||||
|
const ArgType3 & arg3 )
|
||||||
|
{
|
||||||
|
typedef typename
|
||||||
|
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||||
|
, void , void , void , void
|
||||||
|
>::type
|
||||||
|
DstViewType ;
|
||||||
|
DstViewType sub_view;
|
||||||
|
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3);
|
||||||
|
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3);
|
||||||
|
sub_view.modified_device = src.modified_device;
|
||||||
|
sub_view.modified_host = src.modified_host;
|
||||||
|
return sub_view;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class D , class A1 , class A2 , class A3 ,
|
||||||
|
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
||||||
|
class ArgType4 >
|
||||||
|
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||||
|
, ArgType4 , void , void , void
|
||||||
|
>::type
|
||||||
|
subview( const DualView<D,A1,A2,A3> & src ,
|
||||||
|
const ArgType0 & arg0 ,
|
||||||
|
const ArgType1 & arg1 ,
|
||||||
|
const ArgType2 & arg2 ,
|
||||||
|
const ArgType3 & arg3 ,
|
||||||
|
const ArgType4 & arg4 )
|
||||||
|
{
|
||||||
|
typedef typename
|
||||||
|
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||||
|
, ArgType4 , void , void ,void
|
||||||
|
>::type
|
||||||
|
DstViewType ;
|
||||||
|
DstViewType sub_view;
|
||||||
|
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4);
|
||||||
|
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4);
|
||||||
|
sub_view.modified_device = src.modified_device;
|
||||||
|
sub_view.modified_host = src.modified_host;
|
||||||
|
return sub_view;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class D , class A1 , class A2 , class A3 ,
|
||||||
|
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
||||||
|
class ArgType4 , class ArgType5 >
|
||||||
|
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||||
|
, ArgType4 , ArgType5 , void , void
|
||||||
|
>::type
|
||||||
|
subview( const DualView<D,A1,A2,A3> & src ,
|
||||||
|
const ArgType0 & arg0 ,
|
||||||
|
const ArgType1 & arg1 ,
|
||||||
|
const ArgType2 & arg2 ,
|
||||||
|
const ArgType3 & arg3 ,
|
||||||
|
const ArgType4 & arg4 ,
|
||||||
|
const ArgType5 & arg5 )
|
||||||
|
{
|
||||||
|
typedef typename
|
||||||
|
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||||
|
, ArgType4 , ArgType5 , void , void
|
||||||
|
>::type
|
||||||
|
DstViewType ;
|
||||||
|
DstViewType sub_view;
|
||||||
|
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5);
|
||||||
|
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5);
|
||||||
|
sub_view.modified_device = src.modified_device;
|
||||||
|
sub_view.modified_host = src.modified_host;
|
||||||
|
return sub_view;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class D , class A1 , class A2 , class A3 ,
|
||||||
|
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
||||||
|
class ArgType4 , class ArgType5 , class ArgType6 >
|
||||||
|
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||||
|
, ArgType4 , ArgType5 , ArgType6 , void
|
||||||
|
>::type
|
||||||
|
subview( const DualView<D,A1,A2,A3> & src ,
|
||||||
|
const ArgType0 & arg0 ,
|
||||||
|
const ArgType1 & arg1 ,
|
||||||
|
const ArgType2 & arg2 ,
|
||||||
|
const ArgType3 & arg3 ,
|
||||||
|
const ArgType4 & arg4 ,
|
||||||
|
const ArgType5 & arg5 ,
|
||||||
|
const ArgType6 & arg6 )
|
||||||
|
{
|
||||||
|
typedef typename
|
||||||
|
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||||
|
, ArgType4 , ArgType5 , ArgType6 , void
|
||||||
|
>::type
|
||||||
|
DstViewType ;
|
||||||
|
DstViewType sub_view;
|
||||||
|
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
|
||||||
|
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
|
||||||
|
sub_view.modified_device = src.modified_device;
|
||||||
|
sub_view.modified_host = src.modified_host;
|
||||||
|
return sub_view;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class D , class A1 , class A2 , class A3 ,
|
||||||
|
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
||||||
|
class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 >
|
||||||
|
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||||
|
, ArgType4 , ArgType5 , ArgType6 , ArgType7
|
||||||
|
>::type
|
||||||
|
subview( const DualView<D,A1,A2,A3> & src ,
|
||||||
|
const ArgType0 & arg0 ,
|
||||||
|
const ArgType1 & arg1 ,
|
||||||
|
const ArgType2 & arg2 ,
|
||||||
|
const ArgType3 & arg3 ,
|
||||||
|
const ArgType4 & arg4 ,
|
||||||
|
const ArgType5 & arg5 ,
|
||||||
|
const ArgType6 & arg6 ,
|
||||||
|
const ArgType7 & arg7 )
|
||||||
|
{
|
||||||
|
typedef typename
|
||||||
|
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||||
|
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||||
|
, ArgType4 , ArgType5 , ArgType6 , ArgType7
|
||||||
|
>::type
|
||||||
|
DstViewType ;
|
||||||
|
DstViewType sub_view;
|
||||||
|
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
|
||||||
|
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
|
||||||
|
sub_view.modified_device = src.modified_device;
|
||||||
|
sub_view.modified_host = src.modified_host;
|
||||||
|
return sub_view;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Partial specialization of Kokkos::deep_copy() for DualView objects.
|
||||||
|
//
|
||||||
|
|
||||||
|
template< class DT , class DL , class DD , class DM ,
|
||||||
|
class ST , class SL , class SD , class SM >
|
||||||
|
void
|
||||||
|
deep_copy (DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
|
||||||
|
const DualView<ST,SL,SD,SM>& src )
|
||||||
|
{
|
||||||
|
if (src.modified_device () >= src.modified_host ()) {
|
||||||
|
deep_copy (dst.d_view, src.d_view);
|
||||||
|
dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
|
||||||
|
} else {
|
||||||
|
deep_copy (dst.h_view, src.h_view);
|
||||||
|
dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#endif
|
||||||
173
lib/kokkos/containers/src/Kokkos_Functional.hpp
Executable file
173
lib/kokkos/containers/src/Kokkos_Functional.hpp
Executable file
@ -0,0 +1,173 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
|
||||||
|
#ifndef KOKKOS_FUNCTIONAL_HPP
|
||||||
|
#define KOKKOS_FUNCTIONAL_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
#include <impl/Kokkos_Functional_impl.hpp>
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
// These should work for most types
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct pod_hash
|
||||||
|
{
|
||||||
|
typedef T argument_type;
|
||||||
|
typedef T first_argument_type;
|
||||||
|
typedef uint32_t second_argument_type;
|
||||||
|
typedef uint32_t result_type;
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
uint32_t operator()(T const & t) const
|
||||||
|
{ return Impl::MurmurHash3_x86_32( &t, sizeof(T), 0); }
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
uint32_t operator()(T const & t, uint32_t seed) const
|
||||||
|
{ return Impl::MurmurHash3_x86_32( &t, sizeof(T), seed); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct pod_equal_to
|
||||||
|
{
|
||||||
|
typedef T first_argument_type;
|
||||||
|
typedef T second_argument_type;
|
||||||
|
typedef bool result_type;
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator()(T const & a, T const & b) const
|
||||||
|
{ return Impl::bitwise_equal(&a,&b); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct pod_not_equal_to
|
||||||
|
{
|
||||||
|
typedef T first_argument_type;
|
||||||
|
typedef T second_argument_type;
|
||||||
|
typedef bool result_type;
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator()(T const & a, T const & b) const
|
||||||
|
{ return !Impl::bitwise_equal(&a,&b); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct equal_to
|
||||||
|
{
|
||||||
|
typedef T first_argument_type;
|
||||||
|
typedef T second_argument_type;
|
||||||
|
typedef bool result_type;
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator()(T const & a, T const & b) const
|
||||||
|
{ return a == b; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct not_equal_to
|
||||||
|
{
|
||||||
|
typedef T first_argument_type;
|
||||||
|
typedef T second_argument_type;
|
||||||
|
typedef bool result_type;
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator()(T const & a, T const & b) const
|
||||||
|
{ return a != b; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct greater
|
||||||
|
{
|
||||||
|
typedef T first_argument_type;
|
||||||
|
typedef T second_argument_type;
|
||||||
|
typedef bool result_type;
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator()(T const & a, T const & b) const
|
||||||
|
{ return a > b; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct less
|
||||||
|
{
|
||||||
|
typedef T first_argument_type;
|
||||||
|
typedef T second_argument_type;
|
||||||
|
typedef bool result_type;
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator()(T const & a, T const & b) const
|
||||||
|
{ return a < b; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct greater_equal
|
||||||
|
{
|
||||||
|
typedef T first_argument_type;
|
||||||
|
typedef T second_argument_type;
|
||||||
|
typedef bool result_type;
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator()(T const & a, T const & b) const
|
||||||
|
{ return a >= b; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct less_equal
|
||||||
|
{
|
||||||
|
typedef T first_argument_type;
|
||||||
|
typedef T second_argument_type;
|
||||||
|
typedef bool result_type;
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator()(T const & a, T const & b) const
|
||||||
|
{ return a <= b; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
|
||||||
|
#endif //KOKKOS_FUNCTIONAL_HPP
|
||||||
|
|
||||||
|
|
||||||
531
lib/kokkos/containers/src/Kokkos_SegmentedView.hpp
Executable file
531
lib/kokkos/containers/src/Kokkos_SegmentedView.hpp
Executable file
@ -0,0 +1,531 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_SEGMENTED_VIEW_HPP_
|
||||||
|
#define KOKKOS_SEGMENTED_VIEW_HPP_
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
#include <impl/Kokkos_Error.hpp>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Experimental {
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
|
||||||
|
struct delete_segmented_view;
|
||||||
|
|
||||||
|
template<class MemorySpace>
|
||||||
|
inline
|
||||||
|
void DeviceSetAllocatableMemorySize(size_t) {}
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_CUDA )
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline
|
||||||
|
void DeviceSetAllocatableMemorySize<Kokkos::CudaSpace>(size_t size) {
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
size_t size_limit;
|
||||||
|
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
||||||
|
if(size_limit<size)
|
||||||
|
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
|
||||||
|
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline
|
||||||
|
void DeviceSetAllocatableMemorySize<Kokkos::CudaUVMSpace>(size_t size) {
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
size_t size_limit;
|
||||||
|
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
||||||
|
if(size_limit<size)
|
||||||
|
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
|
||||||
|
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class DataType ,
|
||||||
|
class Arg1Type = void ,
|
||||||
|
class Arg2Type = void ,
|
||||||
|
class Arg3Type = void>
|
||||||
|
class SegmentedView : public Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
//! \name Typedefs for device types and various Kokkos::View specializations.
|
||||||
|
//@{
|
||||||
|
typedef Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
|
||||||
|
|
||||||
|
//! The type of a Kokkos::View on the device.
|
||||||
|
typedef Kokkos::View< typename traits::data_type ,
|
||||||
|
typename traits::array_layout ,
|
||||||
|
typename traits::memory_space ,
|
||||||
|
Kokkos::MemoryUnmanaged > t_dev ;
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
Kokkos::View<t_dev*,typename traits::memory_space> segments_;
|
||||||
|
|
||||||
|
Kokkos::View<int,typename traits::memory_space> realloc_lock;
|
||||||
|
Kokkos::View<int,typename traits::memory_space> nsegments_;
|
||||||
|
|
||||||
|
size_t segment_length_;
|
||||||
|
size_t segment_length_m1_;
|
||||||
|
int max_segments_;
|
||||||
|
|
||||||
|
int segment_length_log2;
|
||||||
|
|
||||||
|
// Dimensions, cardinality, capacity, and offset computation for
|
||||||
|
// multidimensional array view of contiguous memory.
|
||||||
|
// Inherits from Impl::Shape
|
||||||
|
typedef Kokkos::Impl::ViewOffset< typename traits::shape_type
|
||||||
|
, typename traits::array_layout
|
||||||
|
> offset_map_type ;
|
||||||
|
|
||||||
|
offset_map_type m_offset_map ;
|
||||||
|
|
||||||
|
typedef Kokkos::View< typename traits::array_intrinsic_type ,
|
||||||
|
typename traits::array_layout ,
|
||||||
|
typename traits::memory_space ,
|
||||||
|
typename traits::memory_traits > array_type ;
|
||||||
|
|
||||||
|
typedef Kokkos::View< typename traits::const_data_type ,
|
||||||
|
typename traits::array_layout ,
|
||||||
|
typename traits::memory_space ,
|
||||||
|
typename traits::memory_traits > const_type ;
|
||||||
|
|
||||||
|
typedef Kokkos::View< typename traits::non_const_data_type ,
|
||||||
|
typename traits::array_layout ,
|
||||||
|
typename traits::memory_space ,
|
||||||
|
typename traits::memory_traits > non_const_type ;
|
||||||
|
|
||||||
|
typedef Kokkos::View< typename traits::non_const_data_type ,
|
||||||
|
typename traits::array_layout ,
|
||||||
|
HostSpace ,
|
||||||
|
void > HostMirror ;
|
||||||
|
|
||||||
|
template< bool Accessible >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
typename Kokkos::Impl::enable_if< Accessible , typename traits::size_type >::type
|
||||||
|
dimension_0_intern() const { return nsegments_() * segment_length_ ; }
|
||||||
|
|
||||||
|
template< bool Accessible >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
typename Kokkos::Impl::enable_if< ! Accessible , typename traits::size_type >::type
|
||||||
|
dimension_0_intern() const
|
||||||
|
{
|
||||||
|
// In Host space
|
||||||
|
int n = 0 ;
|
||||||
|
#if ! defined( __CUDA_ARCH__ )
|
||||||
|
Kokkos::Impl::DeepCopy< HostSpace , typename traits::memory_space >( & n , nsegments_.ptr_on_device() , sizeof(int) );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return n * segment_length_ ;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
enum { Rank = traits::rank };
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION offset_map_type shape() const { return m_offset_map ; }
|
||||||
|
|
||||||
|
/* \brief return (current) size of dimension 0 */
|
||||||
|
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const {
|
||||||
|
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
|
||||||
|
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
|
||||||
|
int n = SegmentedView::dimension_0_intern< Accessible >();
|
||||||
|
return n ;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* \brief return size of dimension 1 */
|
||||||
|
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; }
|
||||||
|
/* \brief return size of dimension 2 */
|
||||||
|
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; }
|
||||||
|
/* \brief return size of dimension 3 */
|
||||||
|
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; }
|
||||||
|
/* \brief return size of dimension 4 */
|
||||||
|
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; }
|
||||||
|
/* \brief return size of dimension 5 */
|
||||||
|
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; }
|
||||||
|
/* \brief return size of dimension 6 */
|
||||||
|
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; }
|
||||||
|
/* \brief return size of dimension 7 */
|
||||||
|
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; }
|
||||||
|
|
||||||
|
/* \brief return size of dimension 2 */
|
||||||
|
KOKKOS_INLINE_FUNCTION typename traits::size_type size() const {
|
||||||
|
return dimension_0() *
|
||||||
|
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
|
||||||
|
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7 ;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename iType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
typename traits::size_type dimension( const iType & i ) const {
|
||||||
|
if(i==0)
|
||||||
|
return dimension_0();
|
||||||
|
else
|
||||||
|
return Kokkos::Impl::dimension( m_offset_map , i );
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
typename traits::size_type capacity() {
|
||||||
|
return segments_.dimension_0() *
|
||||||
|
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
|
||||||
|
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
typename traits::size_type get_num_segments() {
|
||||||
|
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
|
||||||
|
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
|
||||||
|
int n = SegmentedView::dimension_0_intern< Accessible >();
|
||||||
|
return n/segment_length_ ;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
typename traits::size_type get_max_segments() {
|
||||||
|
return max_segments_;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Constructor that allocates View objects with an initial length of 0.
|
||||||
|
///
|
||||||
|
/// This constructor works mostly like the analogous constructor of View.
|
||||||
|
/// The first argument is a string label, which is entirely for your
|
||||||
|
/// benefit. (Different SegmentedView objects may have the same label if
|
||||||
|
/// you like.) The second argument 'view_length' is the size of the segments.
|
||||||
|
/// This number must be a power of two. The third argument n0 is the maximum
|
||||||
|
/// value for the first dimension of the segmented view. The maximal allocatable
|
||||||
|
/// number of Segments is thus: (n0+view_length-1)/view_length.
|
||||||
|
/// The arguments that follow are the other dimensions of the (1-7) of the
|
||||||
|
/// View objects. For example, for a View with 3 runtime dimensions,
|
||||||
|
/// the first 4 integer arguments will be nonzero:
|
||||||
|
/// SegmentedView("Name",32768,10000000,8,4). This allocates a SegmentedView
|
||||||
|
/// with a maximum of 306 segments of dimension (32768,8,4). The logical size of
|
||||||
|
/// the segmented view is (n,8,4) with n between 0 and 10000000.
|
||||||
|
/// You may omit the integer arguments that follow.
|
||||||
|
template< class LabelType >
|
||||||
|
SegmentedView(const LabelType & label ,
|
||||||
|
const size_t view_length ,
|
||||||
|
const size_t n0 ,
|
||||||
|
const size_t n1 = 0 ,
|
||||||
|
const size_t n2 = 0 ,
|
||||||
|
const size_t n3 = 0 ,
|
||||||
|
const size_t n4 = 0 ,
|
||||||
|
const size_t n5 = 0 ,
|
||||||
|
const size_t n6 = 0 ,
|
||||||
|
const size_t n7 = 0
|
||||||
|
): segment_length_(view_length),segment_length_m1_(view_length-1)
|
||||||
|
{
|
||||||
|
segment_length_log2 = -1;
|
||||||
|
size_t l = segment_length_;
|
||||||
|
while(l>0) {
|
||||||
|
l>>=1;
|
||||||
|
segment_length_log2++;
|
||||||
|
}
|
||||||
|
l = 1<<segment_length_log2;
|
||||||
|
if(l!=segment_length_)
|
||||||
|
Kokkos::Impl::throw_runtime_exception("Kokkos::SegmentedView requires a 'power of 2' segment length");
|
||||||
|
|
||||||
|
max_segments_ = (n0+segment_length_m1_)/segment_length_;
|
||||||
|
|
||||||
|
Impl::DeviceSetAllocatableMemorySize<typename traits::memory_space>(segment_length_*max_segments_*sizeof(typename traits::value_type));
|
||||||
|
|
||||||
|
segments_ = Kokkos::View<t_dev*,typename traits::execution_space>(label , max_segments_);
|
||||||
|
realloc_lock = Kokkos::View<int,typename traits::execution_space>("Lock");
|
||||||
|
nsegments_ = Kokkos::View<int,typename traits::execution_space>("nviews");
|
||||||
|
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n0*n1*n2*n3*n4*n5*n6*n7 );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
SegmentedView(const SegmentedView& src):
|
||||||
|
segments_(src.segments_),
|
||||||
|
realloc_lock (src.realloc_lock),
|
||||||
|
nsegments_ (src.nsegments_),
|
||||||
|
segment_length_(src.segment_length_),
|
||||||
|
segment_length_m1_(src.segment_length_m1_),
|
||||||
|
max_segments_ (src.max_segments_),
|
||||||
|
segment_length_log2(src.segment_length_log2),
|
||||||
|
m_offset_map (src.m_offset_map)
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
SegmentedView& operator= (const SegmentedView& src) {
|
||||||
|
segments_ = src.segments_;
|
||||||
|
realloc_lock = src.realloc_lock;
|
||||||
|
nsegments_ = src.nsegments_;
|
||||||
|
segment_length_= src.segment_length_;
|
||||||
|
segment_length_m1_= src.segment_length_m1_;
|
||||||
|
max_segments_ = src.max_segments_;
|
||||||
|
segment_length_log2= src.segment_length_log2;
|
||||||
|
m_offset_map = src.m_offset_map;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
~SegmentedView() {
|
||||||
|
if ( !segments_.tracker().ref_counting()) { return; }
|
||||||
|
size_t ref_count = segments_.tracker().ref_count();
|
||||||
|
if(ref_count == 1u) {
|
||||||
|
Kokkos::fence();
|
||||||
|
typename Kokkos::View<int,typename traits::execution_space>::HostMirror h_nviews("h_nviews");
|
||||||
|
Kokkos::deep_copy(h_nviews,nsegments_);
|
||||||
|
Kokkos::parallel_for(h_nviews(),Impl::delete_segmented_view<DataType , Arg1Type , Arg2Type, Arg3Type>(*this));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
t_dev get_segment(const int& i) const {
|
||||||
|
return segments_[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class MemberType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void grow (MemberType& team_member, const size_t& growSize) const {
|
||||||
|
if (growSize>max_segments_*segment_length_) {
|
||||||
|
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(team_member.team_rank()==0) {
|
||||||
|
bool too_small = growSize > segment_length_ * nsegments_();
|
||||||
|
if (too_small) {
|
||||||
|
while(Kokkos::atomic_compare_exchange(&realloc_lock(),0,1) )
|
||||||
|
; // get the lock
|
||||||
|
too_small = growSize > segment_length_ * nsegments_(); // Recheck once we have the lock
|
||||||
|
if(too_small) {
|
||||||
|
while(too_small) {
|
||||||
|
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
|
||||||
|
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
|
||||||
|
typename traits::non_const_value_type* const ptr = new typename traits::non_const_value_type[alloc_size];
|
||||||
|
|
||||||
|
segments_(nsegments_()) =
|
||||||
|
t_dev(ptr,segment_length_,m_offset_map.N1,m_offset_map.N2,m_offset_map.N3,m_offset_map.N4,m_offset_map.N5,m_offset_map.N6,m_offset_map.N7);
|
||||||
|
nsegments_()++;
|
||||||
|
too_small = growSize > segment_length_ * nsegments_();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
realloc_lock() = 0; //release the lock
|
||||||
|
}
|
||||||
|
}
|
||||||
|
team_member.team_barrier();
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void grow_non_thread_safe (const size_t& growSize) const {
|
||||||
|
if (growSize>max_segments_*segment_length_) {
|
||||||
|
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
bool too_small = growSize > segment_length_ * nsegments_();
|
||||||
|
if(too_small) {
|
||||||
|
while(too_small) {
|
||||||
|
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
|
||||||
|
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
|
||||||
|
typename traits::non_const_value_type* const ptr =
|
||||||
|
new typename traits::non_const_value_type[alloc_size];
|
||||||
|
|
||||||
|
segments_(nsegments_()) =
|
||||||
|
t_dev (ptr, segment_length_, m_offset_map.N1, m_offset_map.N2,
|
||||||
|
m_offset_map.N3, m_offset_map.N4, m_offset_map.N5,
|
||||||
|
m_offset_map.N6, m_offset_map.N7);
|
||||||
|
nsegments_()++;
|
||||||
|
too_small = growSize > segment_length_ * nsegments_();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename iType0 >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
typename std::enable_if<( std::is_integral<iType0>::value && traits::rank == 1 )
|
||||||
|
, typename traits::value_type &
|
||||||
|
>::type
|
||||||
|
operator() ( const iType0 & i0 ) const
|
||||||
|
{
|
||||||
|
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_));
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename iType0 , typename iType1 >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||||
|
std::is_integral<iType1>::value &&
|
||||||
|
traits::rank == 2 )
|
||||||
|
, typename traits::value_type &
|
||||||
|
>::type
|
||||||
|
operator() ( const iType0 & i0 , const iType1 & i1 ) const
|
||||||
|
{
|
||||||
|
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename iType0 , typename iType1 , typename iType2 >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||||
|
std::is_integral<iType1>::value &&
|
||||||
|
std::is_integral<iType2>::value &&
|
||||||
|
traits::rank == 3 )
|
||||||
|
, typename traits::value_type &
|
||||||
|
>::type
|
||||||
|
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
|
||||||
|
{
|
||||||
|
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2);
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||||
|
std::is_integral<iType1>::value &&
|
||||||
|
std::is_integral<iType2>::value &&
|
||||||
|
std::is_integral<iType3>::value &&
|
||||||
|
traits::rank == 4 )
|
||||||
|
, typename traits::value_type &
|
||||||
|
>::type
|
||||||
|
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
|
||||||
|
{
|
||||||
|
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3);
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
||||||
|
typename iType4 >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||||
|
std::is_integral<iType1>::value &&
|
||||||
|
std::is_integral<iType2>::value &&
|
||||||
|
std::is_integral<iType3>::value &&
|
||||||
|
std::is_integral<iType4>::value &&
|
||||||
|
traits::rank == 5 )
|
||||||
|
, typename traits::value_type &
|
||||||
|
>::type
|
||||||
|
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
||||||
|
const iType4 & i4 ) const
|
||||||
|
{
|
||||||
|
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4);
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
||||||
|
typename iType4 , typename iType5 >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||||
|
std::is_integral<iType1>::value &&
|
||||||
|
std::is_integral<iType2>::value &&
|
||||||
|
std::is_integral<iType3>::value &&
|
||||||
|
std::is_integral<iType4>::value &&
|
||||||
|
std::is_integral<iType5>::value &&
|
||||||
|
traits::rank == 6 )
|
||||||
|
, typename traits::value_type &
|
||||||
|
>::type
|
||||||
|
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
||||||
|
const iType4 & i4 , const iType5 & i5 ) const
|
||||||
|
{
|
||||||
|
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5);
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
||||||
|
typename iType4 , typename iType5 , typename iType6 >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||||
|
std::is_integral<iType1>::value &&
|
||||||
|
std::is_integral<iType2>::value &&
|
||||||
|
std::is_integral<iType3>::value &&
|
||||||
|
std::is_integral<iType4>::value &&
|
||||||
|
std::is_integral<iType5>::value &&
|
||||||
|
std::is_integral<iType6>::value &&
|
||||||
|
traits::rank == 7 )
|
||||||
|
, typename traits::value_type &
|
||||||
|
>::type
|
||||||
|
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
||||||
|
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
|
||||||
|
{
|
||||||
|
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6);
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
||||||
|
typename iType4 , typename iType5 , typename iType6 , typename iType7 >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||||
|
std::is_integral<iType1>::value &&
|
||||||
|
std::is_integral<iType2>::value &&
|
||||||
|
std::is_integral<iType3>::value &&
|
||||||
|
std::is_integral<iType4>::value &&
|
||||||
|
std::is_integral<iType5>::value &&
|
||||||
|
std::is_integral<iType6>::value &&
|
||||||
|
std::is_integral<iType7>::value &&
|
||||||
|
traits::rank == 8 )
|
||||||
|
, typename traits::value_type &
|
||||||
|
>::type
|
||||||
|
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
||||||
|
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const
|
||||||
|
{
|
||||||
|
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6,i7);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
|
||||||
|
struct delete_segmented_view {
|
||||||
|
typedef SegmentedView<DataType , Arg1Type , Arg2Type, Arg3Type> view_type;
|
||||||
|
typedef typename view_type::execution_space execution_space;
|
||||||
|
|
||||||
|
view_type view_;
|
||||||
|
delete_segmented_view(view_type view):view_(view) {
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (int i) const {
|
||||||
|
delete [] view_.get_segment(i).ptr_on_device();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
226
lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp
Executable file
226
lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp
Executable file
@ -0,0 +1,226 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_STATICCRSGRAPH_HPP
|
||||||
|
#define KOKKOS_STATICCRSGRAPH_HPP
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/// \class StaticCrsGraph
|
||||||
|
/// \brief Compressed row storage array.
|
||||||
|
///
|
||||||
|
/// \tparam DataType The type of stored entries. If a StaticCrsGraph is
|
||||||
|
/// used as the graph of a sparse matrix, then this is usually an
|
||||||
|
/// integer type, the type of the column indices in the sparse
|
||||||
|
/// matrix.
|
||||||
|
///
|
||||||
|
/// \tparam Arg1Type The second template parameter, corresponding
|
||||||
|
/// either to the Device type (if there are no more template
|
||||||
|
/// parameters) or to the Layout type (if there is at least one more
|
||||||
|
/// template parameter).
|
||||||
|
///
|
||||||
|
/// \tparam Arg2Type The third template parameter, which if provided
|
||||||
|
/// corresponds to the Device type.
|
||||||
|
///
|
||||||
|
/// \tparam SizeType The type of row offsets. Usually the default
|
||||||
|
/// parameter suffices. However, setting a nondefault value is
|
||||||
|
/// necessary in some cases, for example, if you want to have a
|
||||||
|
/// sparse matrices with dimensions (and therefore column indices)
|
||||||
|
/// that fit in \c int, but want to store more than <tt>INT_MAX</tt>
|
||||||
|
/// entries in the sparse matrix.
|
||||||
|
///
|
||||||
|
/// A row has a range of entries:
|
||||||
|
/// <ul>
|
||||||
|
/// <li> <tt> row_map[i0] <= entry < row_map[i0+1] </tt> </li>
|
||||||
|
/// <li> <tt> 0 <= i1 < row_map[i0+1] - row_map[i0] </tt> </li>
|
||||||
|
/// <li> <tt> entries( entry , i2 , i3 , ... ); </tt> </li>
|
||||||
|
/// <li> <tt> entries( row_map[i0] + i1 , i2 , i3 , ... ); </tt> </li>
|
||||||
|
/// </ul>
|
||||||
|
template< class DataType,
|
||||||
|
class Arg1Type,
|
||||||
|
class Arg2Type = void,
|
||||||
|
typename SizeType = typename ViewTraits<DataType*, Arg1Type, Arg2Type, void >::size_type>
|
||||||
|
class StaticCrsGraph {
|
||||||
|
private:
|
||||||
|
typedef ViewTraits<DataType*, Arg1Type, Arg2Type, void> traits;
|
||||||
|
|
||||||
|
public:
|
||||||
|
typedef DataType data_type;
|
||||||
|
typedef typename traits::array_layout array_layout;
|
||||||
|
typedef typename traits::execution_space execution_space;
|
||||||
|
typedef typename traits::device_type device_type;
|
||||||
|
typedef SizeType size_type;
|
||||||
|
|
||||||
|
typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type;
|
||||||
|
typedef StaticCrsGraph< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror;
|
||||||
|
typedef View< const size_type* , array_layout, device_type > row_map_type;
|
||||||
|
typedef View< DataType* , array_layout, device_type > entries_type;
|
||||||
|
|
||||||
|
entries_type entries;
|
||||||
|
row_map_type row_map;
|
||||||
|
|
||||||
|
//! Construct an empty view.
|
||||||
|
StaticCrsGraph () : entries(), row_map() {}
|
||||||
|
|
||||||
|
//! Copy constructor (shallow copy).
|
||||||
|
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<class EntriesType, class RowMapType>
|
||||||
|
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/** \brief Assign to a view of the rhs array.
|
||||||
|
* If the old view is the last view
|
||||||
|
* then allocated memory is deallocated.
|
||||||
|
*/
|
||||||
|
StaticCrsGraph& operator= (const StaticCrsGraph& rhs) {
|
||||||
|
entries = rhs.entries;
|
||||||
|
row_map = rhs.row_map;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Destroy this view of the array.
|
||||||
|
* If the last view then allocated memory is deallocated.
|
||||||
|
*/
|
||||||
|
~StaticCrsGraph() {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
size_type numRows() const {
|
||||||
|
return (row_map.dimension_0 () != 0) ?
|
||||||
|
row_map.dimension_0 () - static_cast<size_type> (1) :
|
||||||
|
static_cast<size_type> (0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template< class StaticCrsGraphType , class InputSizeType >
|
||||||
|
typename StaticCrsGraphType::staticcrsgraph_type
|
||||||
|
create_staticcrsgraph( const std::string & label ,
|
||||||
|
const std::vector< InputSizeType > & input );
|
||||||
|
|
||||||
|
template< class StaticCrsGraphType , class InputSizeType >
|
||||||
|
typename StaticCrsGraphType::staticcrsgraph_type
|
||||||
|
create_staticcrsgraph( const std::string & label ,
|
||||||
|
const std::vector< std::vector< InputSizeType > > & input );
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template< class DataType ,
|
||||||
|
class Arg1Type ,
|
||||||
|
class Arg2Type ,
|
||||||
|
typename SizeType >
|
||||||
|
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
||||||
|
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
|
||||||
|
|
||||||
|
template< class DataType ,
|
||||||
|
class Arg1Type ,
|
||||||
|
class Arg2Type ,
|
||||||
|
typename SizeType >
|
||||||
|
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
||||||
|
create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#include <impl/Kokkos_StaticCrsGraph_factory.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template< class GraphType >
|
||||||
|
struct StaticCrsGraphMaximumEntry {
|
||||||
|
|
||||||
|
typedef typename GraphType::execution_space execution_space ;
|
||||||
|
typedef typename GraphType::data_type value_type ;
|
||||||
|
|
||||||
|
const typename GraphType::entries_type entries ;
|
||||||
|
|
||||||
|
StaticCrsGraphMaximumEntry( const GraphType & graph ) : entries( graph.entries ) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()( const unsigned i , value_type & update ) const
|
||||||
|
{ if ( update < entries(i) ) update = entries(i); }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void init( value_type & update ) const
|
||||||
|
{ update = 0 ; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void join( volatile value_type & update ,
|
||||||
|
volatile const value_type & input ) const
|
||||||
|
{ if ( update < input ) update = input ; }
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class DataType, class Arg1Type, class Arg2Type, typename SizeType >
|
||||||
|
DataType maximum_entry( const StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > & graph )
|
||||||
|
{
|
||||||
|
typedef StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType> GraphType ;
|
||||||
|
typedef Impl::StaticCrsGraphMaximumEntry< GraphType > FunctorType ;
|
||||||
|
|
||||||
|
DataType result = 0 ;
|
||||||
|
Kokkos::parallel_reduce( graph.entries.dimension_0(),
|
||||||
|
FunctorType(graph), result );
|
||||||
|
return result ;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* #ifndef KOKKOS_CRSARRAY_HPP */
|
||||||
|
|
||||||
848
lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp
Executable file
848
lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp
Executable file
@ -0,0 +1,848 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
/// \file Kokkos_UnorderedMap.hpp
|
||||||
|
/// \brief Declaration and definition of Kokkos::UnorderedMap.
|
||||||
|
///
|
||||||
|
/// This header file declares and defines Kokkos::UnorderedMap and its
|
||||||
|
/// related nonmember functions.
|
||||||
|
|
||||||
|
#ifndef KOKKOS_UNORDERED_MAP_HPP
|
||||||
|
#define KOKKOS_UNORDERED_MAP_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
#include <Kokkos_Functional.hpp>
|
||||||
|
|
||||||
|
#include <Kokkos_Bitset.hpp>
|
||||||
|
|
||||||
|
#include <impl/Kokkos_Traits.hpp>
|
||||||
|
#include <impl/Kokkos_UnorderedMap_impl.hpp>
|
||||||
|
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
enum { UnorderedMapInvalidIndex = ~0u };
|
||||||
|
|
||||||
|
/// \brief First element of the return value of UnorderedMap::insert().
|
||||||
|
///
|
||||||
|
/// Inserting an element into an UnorderedMap is not guaranteed to
|
||||||
|
/// succeed. There are three possible conditions:
|
||||||
|
/// <ol>
|
||||||
|
/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually
|
||||||
|
/// means that the UnorderedMap ran out of space. </li>
|
||||||
|
/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key
|
||||||
|
/// did <i>not</i> exist in the table before. </li>
|
||||||
|
/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key
|
||||||
|
/// <i>did</i> exist in the table before. The new value was
|
||||||
|
/// ignored and the old value was left in place. </li>
|
||||||
|
/// </ol>
|
||||||
|
|
||||||
|
class UnorderedMapInsertResult
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
enum Status{
|
||||||
|
SUCCESS = 1u << 31
|
||||||
|
, EXISTING = 1u << 30
|
||||||
|
, FREED_EXISTING = 1u << 29
|
||||||
|
, LIST_LENGTH_MASK = ~(SUCCESS | EXISTING | FREED_EXISTING)
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
/// Did the map successful insert the key/value pair
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool success() const { return (m_status & SUCCESS); }
|
||||||
|
|
||||||
|
/// Was the key already present in the map
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool existing() const { return (m_status & EXISTING); }
|
||||||
|
|
||||||
|
/// Did the map fail to insert the key due to insufficent capacity
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool failed() const { return m_index == UnorderedMapInvalidIndex; }
|
||||||
|
|
||||||
|
/// Did the map lose a race condition to insert a dupulicate key/value pair
|
||||||
|
/// where an index was claimed that needed to be released
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool freed_existing() const { return (m_status & FREED_EXISTING); }
|
||||||
|
|
||||||
|
/// How many iterations through the insert loop did it take before the
|
||||||
|
/// map returned
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
uint32_t list_position() const { return (m_status & LIST_LENGTH_MASK); }
|
||||||
|
|
||||||
|
/// Index where the key can be found as long as the insert did not fail
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
uint32_t index() const { return m_index; }
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
UnorderedMapInsertResult()
|
||||||
|
: m_index(UnorderedMapInvalidIndex)
|
||||||
|
, m_status(0)
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
void increment_list_position()
|
||||||
|
{
|
||||||
|
m_status += (list_position() < LIST_LENGTH_MASK) ? 1u : 0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
void set_existing(uint32_t i, bool arg_freed_existing)
|
||||||
|
{
|
||||||
|
m_index = i;
|
||||||
|
m_status = EXISTING | (arg_freed_existing ? FREED_EXISTING : 0u) | list_position();
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
void set_success(uint32_t i)
|
||||||
|
{
|
||||||
|
m_index = i;
|
||||||
|
m_status = SUCCESS | list_position();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
uint32_t m_index;
|
||||||
|
uint32_t m_status;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// \class UnorderedMap
|
||||||
|
/// \brief Thread-safe, performance-portable lookup table.
|
||||||
|
///
|
||||||
|
/// This class provides a lookup table. In terms of functionality,
|
||||||
|
/// this class compares to std::unordered_map (new in C++11).
|
||||||
|
/// "Unordered" means that keys are not stored in any particular
|
||||||
|
/// order, unlike (for example) std::map. "Thread-safe" means that
|
||||||
|
/// lookups, insertion, and deletion are safe to call by multiple
|
||||||
|
/// threads in parallel. "Performance-portable" means that parallel
|
||||||
|
/// performance of these operations is reasonable, on multiple
|
||||||
|
/// hardware platforms. Platforms on which performance has been
|
||||||
|
/// tested include conventional Intel x86 multicore processors, Intel
|
||||||
|
/// Xeon Phi ("MIC"), and NVIDIA GPUs.
|
||||||
|
///
|
||||||
|
/// Parallel performance portability entails design decisions that
|
||||||
|
/// might differ from one's expectation for a sequential interface.
|
||||||
|
/// This particularly affects insertion of single elements. In an
|
||||||
|
/// interface intended for sequential use, insertion might reallocate
|
||||||
|
/// memory if the original allocation did not suffice to hold the new
|
||||||
|
/// element. In this class, insertion does <i>not</i> reallocate
|
||||||
|
/// memory. This means that it might fail. insert() returns an enum
|
||||||
|
/// which indicates whether the insert failed. There are three
|
||||||
|
/// possible conditions:
|
||||||
|
/// <ol>
|
||||||
|
/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually
|
||||||
|
/// means that the UnorderedMap ran out of space. </li>
|
||||||
|
/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key
|
||||||
|
/// did <i>not</i> exist in the table before. </li>
|
||||||
|
/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key
|
||||||
|
/// <i>did</i> exist in the table before. The new value was
|
||||||
|
/// ignored and the old value was left in place. </li>
|
||||||
|
/// </ol>
|
||||||
|
///
|
||||||
|
/// \tparam Key Type of keys of the lookup table. If \c const, users
|
||||||
|
/// are not allowed to add or remove keys, though they are allowed
|
||||||
|
/// to change values. In that case, the implementation may make
|
||||||
|
/// optimizations specific to the <tt>Device</tt>. For example, if
|
||||||
|
/// <tt>Device</tt> is \c Cuda, it may use texture fetches to access
|
||||||
|
/// keys.
|
||||||
|
///
|
||||||
|
/// \tparam Value Type of values stored in the lookup table. You may use
|
||||||
|
/// \c void here, in which case the table will be a set of keys. If
|
||||||
|
/// \c const, users are not allowed to change entries.
|
||||||
|
/// In that case, the implementation may make
|
||||||
|
/// optimizations specific to the \c Device, such as using texture
|
||||||
|
/// fetches to access values.
|
||||||
|
///
|
||||||
|
/// \tparam Device The Kokkos Device type.
|
||||||
|
///
|
||||||
|
/// \tparam Hasher Definition of the hash function for instances of
|
||||||
|
/// <tt>Key</tt>. The default will calculate a bitwise hash.
|
||||||
|
///
|
||||||
|
/// \tparam EqualTo Definition of the equality function for instances of
|
||||||
|
/// <tt>Key</tt>. The default will do a bitwise equality comparison.
|
||||||
|
///
|
||||||
|
template < typename Key
|
||||||
|
, typename Value
|
||||||
|
, typename Device = Kokkos::DefaultExecutionSpace
|
||||||
|
, typename Hasher = pod_hash<typename Impl::remove_const<Key>::type>
|
||||||
|
, typename EqualTo = pod_equal_to<typename Impl::remove_const<Key>::type>
|
||||||
|
>
|
||||||
|
class UnorderedMap
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
typedef typename ViewTraits<Key,Device,void,void>::host_mirror_space host_mirror_space ;
|
||||||
|
public:
|
||||||
|
//! \name Public types and constants
|
||||||
|
//@{
|
||||||
|
|
||||||
|
//key_types
|
||||||
|
typedef Key declared_key_type;
|
||||||
|
typedef typename Impl::remove_const<declared_key_type>::type key_type;
|
||||||
|
typedef typename Impl::add_const<key_type>::type const_key_type;
|
||||||
|
|
||||||
|
//value_types
|
||||||
|
typedef Value declared_value_type;
|
||||||
|
typedef typename Impl::remove_const<declared_value_type>::type value_type;
|
||||||
|
typedef typename Impl::add_const<value_type>::type const_value_type;
|
||||||
|
|
||||||
|
typedef Device execution_space;
|
||||||
|
typedef Hasher hasher_type;
|
||||||
|
typedef EqualTo equal_to_type;
|
||||||
|
typedef uint32_t size_type;
|
||||||
|
|
||||||
|
//map_types
|
||||||
|
typedef UnorderedMap<declared_key_type,declared_value_type,execution_space,hasher_type,equal_to_type> declared_map_type;
|
||||||
|
typedef UnorderedMap<key_type,value_type,execution_space,hasher_type,equal_to_type> insertable_map_type;
|
||||||
|
typedef UnorderedMap<const_key_type,value_type,execution_space,hasher_type,equal_to_type> modifiable_map_type;
|
||||||
|
typedef UnorderedMap<const_key_type,const_value_type,execution_space,hasher_type,equal_to_type> const_map_type;
|
||||||
|
|
||||||
|
static const bool is_set = Impl::is_same<void,value_type>::value;
|
||||||
|
static const bool has_const_key = Impl::is_same<const_key_type,declared_key_type>::value;
|
||||||
|
static const bool has_const_value = is_set || Impl::is_same<const_value_type,declared_value_type>::value;
|
||||||
|
|
||||||
|
static const bool is_insertable_map = !has_const_key && (is_set || !has_const_value);
|
||||||
|
static const bool is_modifiable_map = has_const_key && !has_const_value;
|
||||||
|
static const bool is_const_map = has_const_key && has_const_value;
|
||||||
|
|
||||||
|
|
||||||
|
typedef UnorderedMapInsertResult insert_result;
|
||||||
|
|
||||||
|
typedef UnorderedMap<Key,Value,host_mirror_space,Hasher,EqualTo> HostMirror;
|
||||||
|
|
||||||
|
typedef Impl::UnorderedMapHistogram<const_map_type> histogram_type;
|
||||||
|
|
||||||
|
//@}
|
||||||
|
|
||||||
|
private:
|
||||||
|
enum { invalid_index = ~static_cast<size_type>(0) };
|
||||||
|
|
||||||
|
typedef typename Impl::if_c< is_set, int, declared_value_type>::type impl_value_type;
|
||||||
|
|
||||||
|
typedef typename Impl::if_c< is_insertable_map
|
||||||
|
, View< key_type *, execution_space>
|
||||||
|
, View< const key_type *, execution_space, MemoryTraits<RandomAccess> >
|
||||||
|
>::type key_type_view;
|
||||||
|
|
||||||
|
typedef typename Impl::if_c< is_insertable_map || is_modifiable_map
|
||||||
|
, View< impl_value_type *, execution_space>
|
||||||
|
, View< const impl_value_type *, execution_space, MemoryTraits<RandomAccess> >
|
||||||
|
>::type value_type_view;
|
||||||
|
|
||||||
|
typedef typename Impl::if_c< is_insertable_map
|
||||||
|
, View< size_type *, execution_space>
|
||||||
|
, View< const size_type *, execution_space, MemoryTraits<RandomAccess> >
|
||||||
|
>::type size_type_view;
|
||||||
|
|
||||||
|
typedef typename Impl::if_c< is_insertable_map
|
||||||
|
, Bitset< execution_space >
|
||||||
|
, ConstBitset< execution_space>
|
||||||
|
>::type bitset_type;
|
||||||
|
|
||||||
|
enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 };
|
||||||
|
enum { num_scalars = 3 };
|
||||||
|
typedef View< int[num_scalars], LayoutLeft, execution_space> scalars_view;
|
||||||
|
|
||||||
|
public:
|
||||||
|
//! \name Public member functions
|
||||||
|
//@{
|
||||||
|
|
||||||
|
UnorderedMap()
|
||||||
|
: m_bounded_insert()
|
||||||
|
, m_hasher()
|
||||||
|
, m_equal_to()
|
||||||
|
, m_size()
|
||||||
|
, m_available_indexes()
|
||||||
|
, m_hash_lists()
|
||||||
|
, m_next_index()
|
||||||
|
, m_keys()
|
||||||
|
, m_values()
|
||||||
|
, m_scalars()
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// \brief Constructor
|
||||||
|
///
|
||||||
|
/// \param capacity_hint [in] Initial guess of how many unique keys will be inserted into the map
|
||||||
|
/// \param hash [in] Hasher function for \c Key instances. The
|
||||||
|
/// default value usually suffices.
|
||||||
|
UnorderedMap( size_type capacity_hint, hasher_type hasher = hasher_type(), equal_to_type equal_to = equal_to_type() )
|
||||||
|
: m_bounded_insert(true)
|
||||||
|
, m_hasher(hasher)
|
||||||
|
, m_equal_to(equal_to)
|
||||||
|
, m_size()
|
||||||
|
, m_available_indexes(calculate_capacity(capacity_hint))
|
||||||
|
, m_hash_lists(ViewAllocateWithoutInitializing("UnorderedMap hash list"), Impl::find_hash_size(capacity()))
|
||||||
|
, m_next_index(ViewAllocateWithoutInitializing("UnorderedMap next index"), capacity()+1) // +1 so that the *_at functions can always return a valid reference
|
||||||
|
, m_keys("UnorderedMap keys",capacity()+1)
|
||||||
|
, m_values("UnorderedMap values",(is_set? 1 : capacity()+1))
|
||||||
|
, m_scalars("UnorderedMap scalars")
|
||||||
|
{
|
||||||
|
if (!is_insertable_map) {
|
||||||
|
throw std::runtime_error("Cannot construct a non-insertable (i.e. const key_type) unordered_map");
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::deep_copy(m_hash_lists, invalid_index);
|
||||||
|
Kokkos::deep_copy(m_next_index, invalid_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset_failed_insert_flag()
|
||||||
|
{
|
||||||
|
reset_flag(failed_insert_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
histogram_type get_histogram()
|
||||||
|
{
|
||||||
|
return histogram_type(*this);
|
||||||
|
}
|
||||||
|
|
||||||
|
//! Clear all entries in the table.
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
m_bounded_insert = true;
|
||||||
|
|
||||||
|
if (capacity() == 0) return;
|
||||||
|
|
||||||
|
m_available_indexes.clear();
|
||||||
|
|
||||||
|
Kokkos::deep_copy(m_hash_lists, invalid_index);
|
||||||
|
Kokkos::deep_copy(m_next_index, invalid_index);
|
||||||
|
{
|
||||||
|
const key_type tmp = key_type();
|
||||||
|
Kokkos::deep_copy(m_keys,tmp);
|
||||||
|
}
|
||||||
|
if (is_set){
|
||||||
|
const impl_value_type tmp = impl_value_type();
|
||||||
|
Kokkos::deep_copy(m_values,tmp);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Kokkos::deep_copy(m_scalars, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Change the capacity of the the map
|
||||||
|
///
|
||||||
|
/// If there are no failed inserts the current size of the map will
|
||||||
|
/// be used as a lower bound for the input capacity.
|
||||||
|
/// If the map is not empty and does not have failed inserts
|
||||||
|
/// and the capacity changes then the current data is copied
|
||||||
|
/// into the resized / rehashed map.
|
||||||
|
///
|
||||||
|
/// This is <i>not</i> a device function; it may <i>not</i> be
|
||||||
|
/// called in a parallel kernel.
|
||||||
|
bool rehash(size_type requested_capacity = 0)
|
||||||
|
{
|
||||||
|
const bool bounded_insert = (capacity() == 0) || (size() == 0u);
|
||||||
|
return rehash(requested_capacity, bounded_insert );
|
||||||
|
}
|
||||||
|
|
||||||
|
bool rehash(size_type requested_capacity, bool bounded_insert)
|
||||||
|
{
|
||||||
|
if(!is_insertable_map) return false;
|
||||||
|
|
||||||
|
const size_type curr_size = size();
|
||||||
|
requested_capacity = (requested_capacity < curr_size) ? curr_size : requested_capacity;
|
||||||
|
|
||||||
|
insertable_map_type tmp(requested_capacity, m_hasher, m_equal_to);
|
||||||
|
|
||||||
|
if (curr_size) {
|
||||||
|
tmp.m_bounded_insert = false;
|
||||||
|
Impl::UnorderedMapRehash<insertable_map_type> f(tmp,*this);
|
||||||
|
f.apply();
|
||||||
|
}
|
||||||
|
tmp.m_bounded_insert = bounded_insert;
|
||||||
|
|
||||||
|
*this = tmp;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief The number of entries in the table.
|
||||||
|
///
|
||||||
|
/// This method has undefined behavior when erasable() is true.
|
||||||
|
///
|
||||||
|
/// Note that this is not a device function; it cannot be called in
|
||||||
|
/// a parallel kernel. The value is not stored as a variable; it
|
||||||
|
/// must be computed.
|
||||||
|
size_type size() const
|
||||||
|
{
|
||||||
|
if( capacity() == 0u ) return 0u;
|
||||||
|
if (modified()) {
|
||||||
|
m_size = m_available_indexes.count();
|
||||||
|
reset_flag(modified_idx);
|
||||||
|
}
|
||||||
|
return m_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief The current number of failed insert() calls.
|
||||||
|
///
|
||||||
|
/// This is <i>not</i> a device function; it may <i>not</i> be
|
||||||
|
/// called in a parallel kernel. The value is not stored as a
|
||||||
|
/// variable; it must be computed.
|
||||||
|
bool failed_insert() const
|
||||||
|
{
|
||||||
|
return get_flag(failed_insert_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool erasable() const
|
||||||
|
{
|
||||||
|
return is_insertable_map ? get_flag(erasable_idx) : false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool begin_erase()
|
||||||
|
{
|
||||||
|
bool result = !erasable();
|
||||||
|
if (is_insertable_map && result) {
|
||||||
|
execution_space::fence();
|
||||||
|
set_flag(erasable_idx);
|
||||||
|
execution_space::fence();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool end_erase()
|
||||||
|
{
|
||||||
|
bool result = erasable();
|
||||||
|
if (is_insertable_map && result) {
|
||||||
|
execution_space::fence();
|
||||||
|
Impl::UnorderedMapErase<declared_map_type> f(*this);
|
||||||
|
f.apply();
|
||||||
|
execution_space::fence();
|
||||||
|
reset_flag(erasable_idx);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief The maximum number of entries that the table can hold.
|
||||||
|
///
|
||||||
|
/// This <i>is</i> a device function; it may be called in a parallel
|
||||||
|
/// kernel.
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
size_type capacity() const
|
||||||
|
{ return m_available_indexes.size(); }
|
||||||
|
|
||||||
|
/// \brief The number of hash table "buckets."
|
||||||
|
///
|
||||||
|
/// This is different than the number of entries that the table can
|
||||||
|
/// hold. Each key hashes to an index in [0, hash_capacity() - 1].
|
||||||
|
/// That index can hold zero or more entries. This class decides
|
||||||
|
/// what hash_capacity() should be, given the user's upper bound on
|
||||||
|
/// the number of entries the table must be able to hold.
|
||||||
|
///
|
||||||
|
/// This <i>is</i> a device function; it may be called in a parallel
|
||||||
|
/// kernel.
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
size_type hash_capacity() const
|
||||||
|
{ return m_hash_lists.dimension_0(); }
|
||||||
|
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
//---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
/// This <i>is</i> a device function; it may be called in a parallel
|
||||||
|
/// kernel. As discussed in the class documentation, it need not
|
||||||
|
/// succeed. The return value tells you if it did.
|
||||||
|
///
|
||||||
|
/// \param k [in] The key to attempt to insert.
|
||||||
|
/// \param v [in] The corresponding value to attempt to insert. If
|
||||||
|
/// using this class as a set (with Value = void), then you need not
|
||||||
|
/// provide this value.
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
insert_result insert(key_type const& k, impl_value_type const&v = impl_value_type()) const
|
||||||
|
{
|
||||||
|
insert_result result;
|
||||||
|
|
||||||
|
if ( !is_insertable_map || capacity() == 0u || m_scalars((int)erasable_idx) ) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( !m_scalars((int)modified_idx) ) {
|
||||||
|
m_scalars((int)modified_idx) = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int volatile & failed_insert_ref = m_scalars((int)failed_insert_idx) ;
|
||||||
|
|
||||||
|
const size_type hash_value = m_hasher(k);
|
||||||
|
const size_type hash_list = hash_value % m_hash_lists.dimension_0();
|
||||||
|
|
||||||
|
size_type * curr_ptr = & m_hash_lists[ hash_list ];
|
||||||
|
size_type new_index = invalid_index ;
|
||||||
|
|
||||||
|
// Force integer multiply to long
|
||||||
|
size_type index_hint = static_cast<size_type>( (static_cast<double>(hash_list) * capacity()) / m_hash_lists.dimension_0());
|
||||||
|
|
||||||
|
size_type find_attempts = 0;
|
||||||
|
|
||||||
|
enum { bounded_find_attempts = 32u };
|
||||||
|
const size_type max_attempts = (m_bounded_insert && (bounded_find_attempts < m_available_indexes.max_hint()) ) ?
|
||||||
|
bounded_find_attempts :
|
||||||
|
m_available_indexes.max_hint();
|
||||||
|
|
||||||
|
bool not_done = true ;
|
||||||
|
|
||||||
|
#if defined( __MIC__ )
|
||||||
|
#pragma noprefetch
|
||||||
|
#endif
|
||||||
|
while ( not_done ) {
|
||||||
|
|
||||||
|
// Continue searching the unordered list for this key,
|
||||||
|
// list will only be appended during insert phase.
|
||||||
|
// Need volatile_load as other threads may be appending.
|
||||||
|
size_type curr = volatile_load(curr_ptr);
|
||||||
|
|
||||||
|
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
|
||||||
|
#if defined( __MIC__ )
|
||||||
|
#pragma noprefetch
|
||||||
|
#endif
|
||||||
|
while ( curr != invalid_index && ! m_equal_to( volatile_load(&m_keys[curr]), k) ) {
|
||||||
|
result.increment_list_position();
|
||||||
|
index_hint = curr;
|
||||||
|
curr_ptr = &m_next_index[curr];
|
||||||
|
curr = volatile_load(curr_ptr);
|
||||||
|
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------
|
||||||
|
// If key already present then return that index.
|
||||||
|
if ( curr != invalid_index ) {
|
||||||
|
|
||||||
|
const bool free_existing = new_index != invalid_index;
|
||||||
|
if ( free_existing ) {
|
||||||
|
// Previously claimed an unused entry that was not inserted.
|
||||||
|
// Release this unused entry immediately.
|
||||||
|
if (!m_available_indexes.reset(new_index) ) {
|
||||||
|
printf("Unable to free existing\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
result.set_existing(curr, free_existing);
|
||||||
|
not_done = false ;
|
||||||
|
}
|
||||||
|
//------------------------------------------------------------
|
||||||
|
// Key is not currently in the map.
|
||||||
|
// If the thread has claimed an entry try to insert now.
|
||||||
|
else {
|
||||||
|
|
||||||
|
//------------------------------------------------------------
|
||||||
|
// If have not already claimed an unused entry then do so now.
|
||||||
|
if (new_index == invalid_index) {
|
||||||
|
|
||||||
|
bool found = false;
|
||||||
|
// use the hash_list as the flag for the search direction
|
||||||
|
Kokkos::tie(found, index_hint) = m_available_indexes.find_any_unset_near( index_hint, hash_list );
|
||||||
|
|
||||||
|
// found and index and this thread set it
|
||||||
|
if ( !found && ++find_attempts >= max_attempts ) {
|
||||||
|
failed_insert_ref = true;
|
||||||
|
not_done = false ;
|
||||||
|
}
|
||||||
|
else if (m_available_indexes.set(index_hint) ) {
|
||||||
|
new_index = index_hint;
|
||||||
|
// Set key and value
|
||||||
|
KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_keys[new_index]);
|
||||||
|
m_keys[new_index] = k ;
|
||||||
|
|
||||||
|
if (!is_set) {
|
||||||
|
KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_values[new_index]);
|
||||||
|
m_values[new_index] = v ;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do not proceed until key and value are updated in global memory
|
||||||
|
memory_fence();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (failed_insert_ref) {
|
||||||
|
not_done = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to append claimed entry into the list.
|
||||||
|
// Another thread may also be trying to append the same list so protect with atomic.
|
||||||
|
if ( new_index != invalid_index &&
|
||||||
|
curr == atomic_compare_exchange(curr_ptr, static_cast<size_type>(invalid_index), new_index) ) {
|
||||||
|
// Succeeded in appending
|
||||||
|
result.set_success(new_index);
|
||||||
|
not_done = false ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // while ( not_done )
|
||||||
|
|
||||||
|
return result ;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
bool erase(key_type const& k) const
|
||||||
|
{
|
||||||
|
bool result = false;
|
||||||
|
|
||||||
|
if(is_insertable_map && 0u < capacity() && m_scalars((int)erasable_idx)) {
|
||||||
|
|
||||||
|
if ( ! m_scalars((int)modified_idx) ) {
|
||||||
|
m_scalars((int)modified_idx) = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_type index = find(k);
|
||||||
|
if (valid_at(index)) {
|
||||||
|
m_available_indexes.reset(index);
|
||||||
|
result = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Find the given key \c k, if it exists in the table.
|
||||||
|
///
|
||||||
|
/// \return If the key exists in the table, the index of the
|
||||||
|
/// value corresponding to that key; otherwise, an invalid index.
|
||||||
|
///
|
||||||
|
/// This <i>is</i> a device function; it may be called in a parallel
|
||||||
|
/// kernel.
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
size_type find( const key_type & k) const
|
||||||
|
{
|
||||||
|
size_type curr = 0u < capacity() ? m_hash_lists( m_hasher(k) % m_hash_lists.dimension_0() ) : invalid_index ;
|
||||||
|
|
||||||
|
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
|
||||||
|
while (curr != invalid_index && !m_equal_to( m_keys[curr], k) ) {
|
||||||
|
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
|
||||||
|
curr = m_next_index[curr];
|
||||||
|
}
|
||||||
|
|
||||||
|
return curr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Does the key exist in the map
|
||||||
|
///
|
||||||
|
/// This <i>is</i> a device function; it may be called in a parallel
|
||||||
|
/// kernel.
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
bool exists( const key_type & k) const
|
||||||
|
{
|
||||||
|
return valid_at(find(k));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// \brief Get the value with \c i as its direct index.
|
||||||
|
///
|
||||||
|
/// \param i [in] Index directly into the array of entries.
|
||||||
|
///
|
||||||
|
/// This <i>is</i> a device function; it may be called in a parallel
|
||||||
|
/// kernel.
|
||||||
|
///
|
||||||
|
/// 'const value_type' via Cuda texture fetch must return by value.
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
typename Impl::if_c< (is_set || has_const_value), impl_value_type, impl_value_type &>::type
|
||||||
|
value_at(size_type i) const
|
||||||
|
{
|
||||||
|
return m_values[ is_set ? 0 : (i < capacity() ? i : capacity()) ];
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Get the key with \c i as its direct index.
|
||||||
|
///
|
||||||
|
/// \param i [in] Index directly into the array of entries.
|
||||||
|
///
|
||||||
|
/// This <i>is</i> a device function; it may be called in a parallel
|
||||||
|
/// kernel.
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
key_type key_at(size_type i) const
|
||||||
|
{
|
||||||
|
return m_keys[ i < capacity() ? i : capacity() ];
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool valid_at(size_type i) const
|
||||||
|
{
|
||||||
|
return m_available_indexes.test(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename SKey, typename SValue>
|
||||||
|
UnorderedMap( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src,
|
||||||
|
typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value,int>::type = 0
|
||||||
|
)
|
||||||
|
: m_bounded_insert(src.m_bounded_insert)
|
||||||
|
, m_hasher(src.m_hasher)
|
||||||
|
, m_equal_to(src.m_equal_to)
|
||||||
|
, m_size(src.m_size)
|
||||||
|
, m_available_indexes(src.m_available_indexes)
|
||||||
|
, m_hash_lists(src.m_hash_lists)
|
||||||
|
, m_next_index(src.m_next_index)
|
||||||
|
, m_keys(src.m_keys)
|
||||||
|
, m_values(src.m_values)
|
||||||
|
, m_scalars(src.m_scalars)
|
||||||
|
{}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename SKey, typename SValue>
|
||||||
|
typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value
|
||||||
|
,declared_map_type & >::type
|
||||||
|
operator=( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src)
|
||||||
|
{
|
||||||
|
m_bounded_insert = src.m_bounded_insert;
|
||||||
|
m_hasher = src.m_hasher;
|
||||||
|
m_equal_to = src.m_equal_to;
|
||||||
|
m_size = src.m_size;
|
||||||
|
m_available_indexes = src.m_available_indexes;
|
||||||
|
m_hash_lists = src.m_hash_lists;
|
||||||
|
m_next_index = src.m_next_index;
|
||||||
|
m_keys = src.m_keys;
|
||||||
|
m_values = src.m_values;
|
||||||
|
m_scalars = src.m_scalars;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename SKey, typename SValue, typename SDevice>
|
||||||
|
typename Impl::enable_if< Impl::is_same< typename Impl::remove_const<SKey>::type, key_type>::value &&
|
||||||
|
Impl::is_same< typename Impl::remove_const<SValue>::type, value_type>::value
|
||||||
|
>::type
|
||||||
|
create_copy_view( UnorderedMap<SKey, SValue, SDevice, Hasher,EqualTo> const& src)
|
||||||
|
{
|
||||||
|
if (m_hash_lists.ptr_on_device() != src.m_hash_lists.ptr_on_device()) {
|
||||||
|
|
||||||
|
insertable_map_type tmp;
|
||||||
|
|
||||||
|
tmp.m_bounded_insert = src.m_bounded_insert;
|
||||||
|
tmp.m_hasher = src.m_hasher;
|
||||||
|
tmp.m_equal_to = src.m_equal_to;
|
||||||
|
tmp.m_size = src.size();
|
||||||
|
tmp.m_available_indexes = bitset_type( src.capacity() );
|
||||||
|
tmp.m_hash_lists = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap hash list"), src.m_hash_lists.dimension_0() );
|
||||||
|
tmp.m_next_index = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap next index"), src.m_next_index.dimension_0() );
|
||||||
|
tmp.m_keys = key_type_view( ViewAllocateWithoutInitializing("UnorderedMap keys"), src.m_keys.dimension_0() );
|
||||||
|
tmp.m_values = value_type_view( ViewAllocateWithoutInitializing("UnorderedMap values"), src.m_values.dimension_0() );
|
||||||
|
tmp.m_scalars = scalars_view("UnorderedMap scalars");
|
||||||
|
|
||||||
|
Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes);
|
||||||
|
|
||||||
|
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, typename SDevice::memory_space > raw_deep_copy;
|
||||||
|
|
||||||
|
raw_deep_copy(tmp.m_hash_lists.ptr_on_device(), src.m_hash_lists.ptr_on_device(), sizeof(size_type)*src.m_hash_lists.dimension_0());
|
||||||
|
raw_deep_copy(tmp.m_next_index.ptr_on_device(), src.m_next_index.ptr_on_device(), sizeof(size_type)*src.m_next_index.dimension_0());
|
||||||
|
raw_deep_copy(tmp.m_keys.ptr_on_device(), src.m_keys.ptr_on_device(), sizeof(key_type)*src.m_keys.dimension_0());
|
||||||
|
if (!is_set) {
|
||||||
|
raw_deep_copy(tmp.m_values.ptr_on_device(), src.m_values.ptr_on_device(), sizeof(impl_value_type)*src.m_values.dimension_0());
|
||||||
|
}
|
||||||
|
raw_deep_copy(tmp.m_scalars.ptr_on_device(), src.m_scalars.ptr_on_device(), sizeof(int)*num_scalars );
|
||||||
|
|
||||||
|
*this = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//@}
|
||||||
|
private: // private member functions
|
||||||
|
|
||||||
|
bool modified() const
|
||||||
|
{
|
||||||
|
return get_flag(modified_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_flag(int flag) const
|
||||||
|
{
|
||||||
|
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
||||||
|
const int true_ = true;
|
||||||
|
raw_deep_copy(m_scalars.ptr_on_device() + flag, &true_, sizeof(int));
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset_flag(int flag) const
|
||||||
|
{
|
||||||
|
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
||||||
|
const int false_ = false;
|
||||||
|
raw_deep_copy(m_scalars.ptr_on_device() + flag, &false_, sizeof(int));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool get_flag(int flag) const
|
||||||
|
{
|
||||||
|
typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename execution_space::memory_space > raw_deep_copy;
|
||||||
|
int result = false;
|
||||||
|
raw_deep_copy(&result, m_scalars.ptr_on_device() + flag, sizeof(int));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t calculate_capacity(uint32_t capacity_hint)
|
||||||
|
{
|
||||||
|
// increase by 16% and round to nears multiple of 128
|
||||||
|
return capacity_hint ? ((static_cast<uint32_t>(7ull*capacity_hint/6u) + 127u)/128u)*128u : 128u;
|
||||||
|
}
|
||||||
|
|
||||||
|
private: // private members
|
||||||
|
bool m_bounded_insert;
|
||||||
|
hasher_type m_hasher;
|
||||||
|
equal_to_type m_equal_to;
|
||||||
|
mutable size_type m_size;
|
||||||
|
bitset_type m_available_indexes;
|
||||||
|
size_type_view m_hash_lists;
|
||||||
|
size_type_view m_next_index;
|
||||||
|
key_type_view m_keys;
|
||||||
|
value_type_view m_values;
|
||||||
|
scalars_view m_scalars;
|
||||||
|
|
||||||
|
template <typename KKey, typename VValue, typename DDevice, typename HHash, typename EEqualTo>
|
||||||
|
friend class UnorderedMap;
|
||||||
|
|
||||||
|
template <typename UMap>
|
||||||
|
friend struct Impl::UnorderedMapErase;
|
||||||
|
|
||||||
|
template <typename UMap>
|
||||||
|
friend struct Impl::UnorderedMapHistogram;
|
||||||
|
|
||||||
|
template <typename UMap>
|
||||||
|
friend struct Impl::UnorderedMapPrint;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Specialization of deep_copy for two UnorderedMap objects.
|
||||||
|
template < typename DKey, typename DT, typename DDevice
|
||||||
|
, typename SKey, typename ST, typename SDevice
|
||||||
|
, typename Hasher, typename EqualTo >
|
||||||
|
inline void deep_copy( UnorderedMap<DKey, DT, DDevice, Hasher, EqualTo> & dst
|
||||||
|
, const UnorderedMap<SKey, ST, SDevice, Hasher, EqualTo> & src )
|
||||||
|
{
|
||||||
|
dst.create_copy_view(src);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#endif //KOKKOS_UNORDERED_MAP_HPP
|
||||||
287
lib/kokkos/containers/src/Kokkos_Vector.hpp
Executable file
287
lib/kokkos/containers/src/Kokkos_Vector.hpp
Executable file
@ -0,0 +1,287 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_VECTOR_HPP
|
||||||
|
#define KOKKOS_VECTOR_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Core_fwd.hpp>
|
||||||
|
#include <Kokkos_DualView.hpp>
|
||||||
|
|
||||||
|
/* Drop in replacement for std::vector based on Kokkos::DualView
|
||||||
|
* Most functions only work on the host (it will not compile if called from device kernel)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
template <typename Scalar, class Space = Kokkos::DefaultExecutionSpace >
|
||||||
|
class vector : public DualView<Scalar*,LayoutLeft,Space> {
|
||||||
|
public:
|
||||||
|
typedef typename Space::memory_space memory_space;
|
||||||
|
typedef typename Space::execution_space execution_space;
|
||||||
|
typedef typename Kokkos::Device<execution_space,memory_space> device_type;
|
||||||
|
|
||||||
|
typedef Scalar value_type;
|
||||||
|
typedef Scalar* pointer;
|
||||||
|
typedef const Scalar* const_pointer;
|
||||||
|
typedef Scalar* reference;
|
||||||
|
typedef const Scalar* const_reference;
|
||||||
|
typedef Scalar* iterator;
|
||||||
|
typedef const Scalar* const_iterator;
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t _size;
|
||||||
|
typedef size_t size_type;
|
||||||
|
float _extra_storage;
|
||||||
|
typedef DualView<Scalar*,LayoutLeft,Space> DV;
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
#ifdef KOKKOS_CUDA_USE_UVM
|
||||||
|
KOKKOS_INLINE_FUNCTION Scalar& operator() (int i) const {return DV::h_view(i);};
|
||||||
|
KOKKOS_INLINE_FUNCTION Scalar& operator[] (int i) const {return DV::h_view(i);};
|
||||||
|
#else
|
||||||
|
inline Scalar& operator() (int i) const {return DV::h_view(i);};
|
||||||
|
inline Scalar& operator[] (int i) const {return DV::h_view(i);};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Member functions which behave like std::vector functions */
|
||||||
|
|
||||||
|
vector():DV() {
|
||||||
|
_size = 0;
|
||||||
|
_extra_storage = 1.1;
|
||||||
|
DV::modified_host() = 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
vector(int n, Scalar val=Scalar()):DualView<Scalar*,LayoutLeft,Space>("Vector",size_t(n*(1.1))) {
|
||||||
|
_size = n;
|
||||||
|
_extra_storage = 1.1;
|
||||||
|
DV::modified_host() = 1;
|
||||||
|
|
||||||
|
assign(n,val);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void resize(size_t n) {
|
||||||
|
if(n>=capacity())
|
||||||
|
DV::resize(size_t (n*_extra_storage));
|
||||||
|
_size = n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void resize(size_t n, const Scalar& val) {
|
||||||
|
assign(n,val);
|
||||||
|
}
|
||||||
|
|
||||||
|
void assign (size_t n, const Scalar& val) {
|
||||||
|
|
||||||
|
/* Resize if necessary (behavour of std:vector) */
|
||||||
|
|
||||||
|
if(n>capacity())
|
||||||
|
DV::resize(size_t (n*_extra_storage));
|
||||||
|
_size = n;
|
||||||
|
|
||||||
|
/* Assign value either on host or on device */
|
||||||
|
|
||||||
|
if( DV::modified_host() >= DV::modified_device() ) {
|
||||||
|
set_functor_host f(DV::h_view,val);
|
||||||
|
parallel_for(n,f);
|
||||||
|
DV::t_host::execution_space::fence();
|
||||||
|
DV::modified_host()++;
|
||||||
|
} else {
|
||||||
|
set_functor f(DV::d_view,val);
|
||||||
|
parallel_for(n,f);
|
||||||
|
DV::t_dev::execution_space::fence();
|
||||||
|
DV::modified_device()++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void reserve(size_t n) {
|
||||||
|
DV::resize(size_t (n*_extra_storage));
|
||||||
|
}
|
||||||
|
|
||||||
|
void push_back(Scalar val) {
|
||||||
|
DV::modified_host()++;
|
||||||
|
if(_size == capacity()) {
|
||||||
|
size_t new_size = _size*_extra_storage;
|
||||||
|
if(new_size == _size) new_size++;
|
||||||
|
DV::resize(new_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
DV::h_view(_size) = val;
|
||||||
|
_size++;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
void pop_back() {
|
||||||
|
_size--;
|
||||||
|
};
|
||||||
|
|
||||||
|
void clear() {
|
||||||
|
_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_type size() const {return _size;};
|
||||||
|
size_type max_size() const {return 2000000000;}
|
||||||
|
size_type capacity() const {return DV::capacity();};
|
||||||
|
bool empty() const {return _size==0;};
|
||||||
|
|
||||||
|
iterator begin() const {return &DV::h_view(0);};
|
||||||
|
|
||||||
|
iterator end() const {return &DV::h_view(_size);};
|
||||||
|
|
||||||
|
|
||||||
|
/* std::algorithms wich work originally with iterators, here they are implemented as member functions */
|
||||||
|
|
||||||
|
size_t
|
||||||
|
lower_bound (const size_t& start,
|
||||||
|
const size_t& theEnd,
|
||||||
|
const Scalar& comp_val) const
|
||||||
|
{
|
||||||
|
int lower = start; // FIXME (mfh 24 Apr 2014) narrowing conversion
|
||||||
|
int upper = _size > theEnd? theEnd : _size-1; // FIXME (mfh 24 Apr 2014) narrowing conversion
|
||||||
|
if (upper <= lower) {
|
||||||
|
return theEnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
Scalar lower_val = DV::h_view(lower);
|
||||||
|
Scalar upper_val = DV::h_view(upper);
|
||||||
|
size_t idx = (upper+lower)/2;
|
||||||
|
Scalar val = DV::h_view(idx);
|
||||||
|
if(val>upper_val) return upper;
|
||||||
|
if(val<lower_val) return start;
|
||||||
|
|
||||||
|
while(upper>lower) {
|
||||||
|
if(comp_val>val) {
|
||||||
|
lower = ++idx;
|
||||||
|
} else {
|
||||||
|
upper = idx;
|
||||||
|
}
|
||||||
|
idx = (upper+lower)/2;
|
||||||
|
val = DV::h_view(idx);
|
||||||
|
}
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_sorted() {
|
||||||
|
for(int i=0;i<_size-1;i++) {
|
||||||
|
if(DV::h_view(i)>DV::h_view(i+1)) return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator find(Scalar val) const {
|
||||||
|
if(_size == 0) return end();
|
||||||
|
|
||||||
|
int upper,lower,current;
|
||||||
|
current = _size/2;
|
||||||
|
upper = _size-1;
|
||||||
|
lower = 0;
|
||||||
|
|
||||||
|
if((val<DV::h_view(0)) || (val>DV::h_view(_size-1)) ) return end();
|
||||||
|
|
||||||
|
while(upper>lower)
|
||||||
|
{
|
||||||
|
if(val>DV::h_view(current)) lower = current+1;
|
||||||
|
else upper = current;
|
||||||
|
current = (upper+lower)/2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(val==DV::h_view(current)) return &DV::h_view(current);
|
||||||
|
else return end();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Additional functions for data management */
|
||||||
|
|
||||||
|
void device_to_host(){
|
||||||
|
deep_copy(DV::h_view,DV::d_view);
|
||||||
|
}
|
||||||
|
void host_to_device() const {
|
||||||
|
deep_copy(DV::d_view,DV::h_view);
|
||||||
|
}
|
||||||
|
|
||||||
|
void on_host() {
|
||||||
|
DV::modified_host() = DV::modified_device() + 1;
|
||||||
|
}
|
||||||
|
void on_device() {
|
||||||
|
DV::modified_device() = DV::modified_host() + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_overallocation(float extra) {
|
||||||
|
_extra_storage = 1.0 + extra;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
struct set_functor {
|
||||||
|
typedef typename DV::t_dev::execution_space execution_space;
|
||||||
|
typename DV::t_dev _data;
|
||||||
|
Scalar _val;
|
||||||
|
|
||||||
|
set_functor(typename DV::t_dev data, Scalar val) :
|
||||||
|
_data(data),_val(val) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const int &i) const {
|
||||||
|
_data(i) = _val;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct set_functor_host {
|
||||||
|
typedef typename DV::t_host::execution_space execution_space;
|
||||||
|
typename DV::t_host _data;
|
||||||
|
Scalar _val;
|
||||||
|
|
||||||
|
set_functor_host(typename DV::t_host data, Scalar val) :
|
||||||
|
_data(data),_val(val) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (const int &i) const {
|
||||||
|
_data(i) = _val;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
173
lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp
Executable file
173
lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp
Executable file
@ -0,0 +1,173 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_BITSET_IMPL_HPP
|
||||||
|
#define KOKKOS_BITSET_IMPL_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <climits>
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
namespace Kokkos { namespace Impl {
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
unsigned rotate_right(unsigned i, int r)
|
||||||
|
{
|
||||||
|
enum { size = static_cast<int>(sizeof(unsigned)*CHAR_BIT) };
|
||||||
|
return r ? ((i >> r) | (i << (size-r))) : i ;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
int bit_scan_forward(unsigned i)
|
||||||
|
{
|
||||||
|
#if defined( __CUDA_ARCH__ )
|
||||||
|
return __ffs(i) - 1;
|
||||||
|
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||||
|
return __builtin_ffs(i) - 1;
|
||||||
|
#elif defined( __INTEL_COMPILER )
|
||||||
|
return _bit_scan_forward(i);
|
||||||
|
#else
|
||||||
|
|
||||||
|
unsigned t = 1u;
|
||||||
|
int r = 0;
|
||||||
|
while (i && (i & t == 0))
|
||||||
|
{
|
||||||
|
t = t << 1;
|
||||||
|
++r;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
int bit_scan_reverse(unsigned i)
|
||||||
|
{
|
||||||
|
enum { shift = static_cast<int>(sizeof(unsigned)*CHAR_BIT - 1) };
|
||||||
|
#if defined( __CUDA_ARCH__ )
|
||||||
|
return shift - __clz(i);
|
||||||
|
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||||
|
return shift - __builtin_clz(i);
|
||||||
|
#elif defined( __INTEL_COMPILER )
|
||||||
|
return _bit_scan_reverse(i);
|
||||||
|
#else
|
||||||
|
unsigned t = 1u << shift;
|
||||||
|
int r = 0;
|
||||||
|
while (i && (i & t == 0))
|
||||||
|
{
|
||||||
|
t = t >> 1;
|
||||||
|
++r;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// count the bits set
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
int popcount(unsigned i)
|
||||||
|
{
|
||||||
|
#if defined( __CUDA_ARCH__ )
|
||||||
|
return __popc(i);
|
||||||
|
#elif defined( __GNUC__ ) || defined( __GNUG__ )
|
||||||
|
return __builtin_popcount(i);
|
||||||
|
#elif defined ( __INTEL_COMPILER )
|
||||||
|
return _popcnt32(i);
|
||||||
|
#else
|
||||||
|
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive
|
||||||
|
i = i - ((i >> 1) & ~0u/3u); // temp
|
||||||
|
i = (i & ~0u/15u*3u) + ((i >> 2) & ~0u/15u*3u); // temp
|
||||||
|
i = (i + (i >> 4)) & ~0u/255u*15u; // temp
|
||||||
|
return (int)((i * (~0u/255u)) >> (sizeof(unsigned) - 1) * CHAR_BIT); // count
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Bitset>
|
||||||
|
struct BitsetCount
|
||||||
|
{
|
||||||
|
typedef Bitset bitset_type;
|
||||||
|
typedef typename bitset_type::execution_space::execution_space execution_space;
|
||||||
|
typedef typename bitset_type::size_type size_type;
|
||||||
|
typedef size_type value_type;
|
||||||
|
|
||||||
|
bitset_type m_bitset;
|
||||||
|
|
||||||
|
BitsetCount( bitset_type const& bitset)
|
||||||
|
: m_bitset(bitset)
|
||||||
|
{}
|
||||||
|
|
||||||
|
size_type apply() const
|
||||||
|
{
|
||||||
|
size_type count = 0u;
|
||||||
|
parallel_reduce(m_bitset.m_blocks.dimension_0(), *this, count);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static void init( value_type & count)
|
||||||
|
{
|
||||||
|
count = 0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static void join( volatile value_type & count, const volatile size_type & incr )
|
||||||
|
{
|
||||||
|
count += incr;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()( size_type i, value_type & count) const
|
||||||
|
{
|
||||||
|
count += popcount(m_bitset.m_blocks[i]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}} //Kokkos::Impl
|
||||||
|
|
||||||
|
#endif // KOKKOS_BITSET_IMPL_HPP
|
||||||
|
|
||||||
195
lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp
Executable file
195
lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp
Executable file
@ -0,0 +1,195 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
|
||||||
|
#ifndef KOKKOS_FUNCTIONAL_IMPL_HPP
|
||||||
|
#define KOKKOS_FUNCTIONAL_IMPL_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
namespace Kokkos { namespace Impl {
|
||||||
|
|
||||||
|
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||||
|
// domain. The author hereby disclaims copyright to this source code.
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
uint32_t getblock32 ( const uint8_t * p, int i )
|
||||||
|
{
|
||||||
|
// used to avoid aliasing error which could cause errors with
|
||||||
|
// forced inlining
|
||||||
|
return ((uint32_t)p[i*4+0])
|
||||||
|
| ((uint32_t)p[i*4+1] << 8)
|
||||||
|
| ((uint32_t)p[i*4+2] << 16)
|
||||||
|
| ((uint32_t)p[i*4+3] << 24);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
uint32_t rotl32 ( uint32_t x, int8_t r )
|
||||||
|
{ return (x << r) | (x >> (32 - r)); }
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
uint32_t fmix32 ( uint32_t h )
|
||||||
|
{
|
||||||
|
h ^= h >> 16;
|
||||||
|
h *= 0x85ebca6b;
|
||||||
|
h ^= h >> 13;
|
||||||
|
h *= 0xc2b2ae35;
|
||||||
|
h ^= h >> 16;
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
uint32_t MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed )
|
||||||
|
{
|
||||||
|
const uint8_t * data = (const uint8_t*)key;
|
||||||
|
const int nblocks = len / 4;
|
||||||
|
|
||||||
|
uint32_t h1 = seed;
|
||||||
|
|
||||||
|
const uint32_t c1 = 0xcc9e2d51;
|
||||||
|
const uint32_t c2 = 0x1b873593;
|
||||||
|
|
||||||
|
//----------
|
||||||
|
// body
|
||||||
|
|
||||||
|
for(int i=0; i<nblocks; ++i)
|
||||||
|
{
|
||||||
|
uint32_t k1 = getblock32(data,i);
|
||||||
|
|
||||||
|
k1 *= c1;
|
||||||
|
k1 = rotl32(k1,15);
|
||||||
|
k1 *= c2;
|
||||||
|
|
||||||
|
h1 ^= k1;
|
||||||
|
h1 = rotl32(h1,13);
|
||||||
|
h1 = h1*5+0xe6546b64;
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------
|
||||||
|
// tail
|
||||||
|
|
||||||
|
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
|
||||||
|
|
||||||
|
uint32_t k1 = 0;
|
||||||
|
|
||||||
|
switch(len & 3)
|
||||||
|
{
|
||||||
|
case 3: k1 ^= tail[2] << 16;
|
||||||
|
case 2: k1 ^= tail[1] << 8;
|
||||||
|
case 1: k1 ^= tail[0];
|
||||||
|
k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1;
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------
|
||||||
|
// finalization
|
||||||
|
|
||||||
|
h1 ^= len;
|
||||||
|
|
||||||
|
h1 = fmix32(h1);
|
||||||
|
|
||||||
|
return h1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if defined( __GNUC__ ) /* GNU C */ || \
|
||||||
|
defined( __GNUG__ ) /* GNU C++ */ || \
|
||||||
|
defined( __clang__ )
|
||||||
|
|
||||||
|
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define KOKKOS_MAY_ALIAS
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
|
||||||
|
{
|
||||||
|
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||||
|
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||||
|
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||||
|
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
NUM_8 = sizeof(T),
|
||||||
|
NUM_16 = NUM_8 / 2,
|
||||||
|
NUM_32 = NUM_8 / 4,
|
||||||
|
NUM_64 = NUM_8 / 8
|
||||||
|
};
|
||||||
|
|
||||||
|
union {
|
||||||
|
T const * const ptr;
|
||||||
|
T64 const * const ptr64;
|
||||||
|
T32 const * const ptr32;
|
||||||
|
T16 const * const ptr16;
|
||||||
|
T8 const * const ptr8;
|
||||||
|
} a = {a_ptr}, b = {b_ptr};
|
||||||
|
|
||||||
|
bool result = true;
|
||||||
|
|
||||||
|
for (int i=0; i < NUM_64; ++i) {
|
||||||
|
result = result && a.ptr64[i] == b.ptr64[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( NUM_64*2 < NUM_32 ) {
|
||||||
|
result = result && a.ptr32[NUM_64*2] == b.ptr32[NUM_64*2];
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( NUM_32*2 < NUM_16 ) {
|
||||||
|
result = result && a.ptr16[NUM_32*2] == b.ptr16[NUM_32*2];
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( NUM_16*2 < NUM_8 ) {
|
||||||
|
result = result && a.ptr8[NUM_16*2] == b.ptr8[NUM_16*2];
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#undef KOKKOS_MAY_ALIAS
|
||||||
|
|
||||||
|
}} // namespace Kokkos::Impl
|
||||||
|
|
||||||
|
#endif //KOKKOS_FUNCTIONAL_IMPL_HPP
|
||||||
208
lib/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp
Executable file
208
lib/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp
Executable file
@ -0,0 +1,208 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
|
||||||
|
#define KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
|
||||||
|
inline
|
||||||
|
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
||||||
|
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view ,
|
||||||
|
typename Impl::enable_if< ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
|
||||||
|
{
|
||||||
|
return view ;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
|
||||||
|
inline
|
||||||
|
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
||||||
|
create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view )
|
||||||
|
{
|
||||||
|
// Force copy:
|
||||||
|
//typedef Impl::ViewAssignment< Impl::ViewDefault > alloc ; // unused
|
||||||
|
typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type ;
|
||||||
|
|
||||||
|
typename staticcrsgraph_type::HostMirror tmp ;
|
||||||
|
typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map);
|
||||||
|
|
||||||
|
// Allocation to match:
|
||||||
|
tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const'
|
||||||
|
tmp.entries = create_mirror( view.entries );
|
||||||
|
|
||||||
|
|
||||||
|
// Deep copy:
|
||||||
|
deep_copy( tmp_row_map , view.row_map );
|
||||||
|
deep_copy( tmp.entries , view.entries );
|
||||||
|
|
||||||
|
return tmp ;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
|
||||||
|
inline
|
||||||
|
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
|
||||||
|
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view ,
|
||||||
|
typename Impl::enable_if< ! ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
|
||||||
|
{
|
||||||
|
return create_mirror( view );
|
||||||
|
}
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
template< class StaticCrsGraphType , class InputSizeType >
|
||||||
|
inline
|
||||||
|
typename StaticCrsGraphType::staticcrsgraph_type
|
||||||
|
create_staticcrsgraph( const std::string & label ,
|
||||||
|
const std::vector< InputSizeType > & input )
|
||||||
|
{
|
||||||
|
typedef StaticCrsGraphType output_type ;
|
||||||
|
//typedef std::vector< InputSizeType > input_type ; // unused
|
||||||
|
|
||||||
|
typedef typename output_type::entries_type entries_type ;
|
||||||
|
|
||||||
|
typedef View< typename output_type::size_type [] ,
|
||||||
|
typename output_type::array_layout ,
|
||||||
|
typename output_type::execution_space > work_type ;
|
||||||
|
|
||||||
|
output_type output ;
|
||||||
|
|
||||||
|
// Create the row map:
|
||||||
|
|
||||||
|
const size_t length = input.size();
|
||||||
|
|
||||||
|
{
|
||||||
|
work_type row_work( "tmp" , length + 1 );
|
||||||
|
|
||||||
|
typename work_type::HostMirror row_work_host =
|
||||||
|
create_mirror_view( row_work );
|
||||||
|
|
||||||
|
size_t sum = 0 ;
|
||||||
|
row_work_host[0] = 0 ;
|
||||||
|
for ( size_t i = 0 ; i < length ; ++i ) {
|
||||||
|
row_work_host[i+1] = sum += input[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
deep_copy( row_work , row_work_host );
|
||||||
|
|
||||||
|
output.entries = entries_type( label , sum );
|
||||||
|
output.row_map = row_work ;
|
||||||
|
}
|
||||||
|
|
||||||
|
return output ;
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template< class StaticCrsGraphType , class InputSizeType >
|
||||||
|
inline
|
||||||
|
typename StaticCrsGraphType::staticcrsgraph_type
|
||||||
|
create_staticcrsgraph( const std::string & label ,
|
||||||
|
const std::vector< std::vector< InputSizeType > > & input )
|
||||||
|
{
|
||||||
|
typedef StaticCrsGraphType output_type ;
|
||||||
|
typedef typename output_type::entries_type entries_type ;
|
||||||
|
|
||||||
|
static_assert( entries_type::rank == 1
|
||||||
|
, "Graph entries view must be rank one" );
|
||||||
|
|
||||||
|
typedef View< typename output_type::size_type [] ,
|
||||||
|
typename output_type::array_layout ,
|
||||||
|
typename output_type::execution_space > work_type ;
|
||||||
|
|
||||||
|
output_type output ;
|
||||||
|
|
||||||
|
// Create the row map:
|
||||||
|
|
||||||
|
const size_t length = input.size();
|
||||||
|
|
||||||
|
{
|
||||||
|
work_type row_work( "tmp" , length + 1 );
|
||||||
|
|
||||||
|
typename work_type::HostMirror row_work_host =
|
||||||
|
create_mirror_view( row_work );
|
||||||
|
|
||||||
|
size_t sum = 0 ;
|
||||||
|
row_work_host[0] = 0 ;
|
||||||
|
for ( size_t i = 0 ; i < length ; ++i ) {
|
||||||
|
row_work_host[i+1] = sum += input[i].size();
|
||||||
|
}
|
||||||
|
|
||||||
|
deep_copy( row_work , row_work_host );
|
||||||
|
|
||||||
|
output.entries = entries_type( label , sum );
|
||||||
|
output.row_map = row_work ;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill in the entries:
|
||||||
|
{
|
||||||
|
typename entries_type::HostMirror host_entries =
|
||||||
|
create_mirror_view( output.entries );
|
||||||
|
|
||||||
|
size_t sum = 0 ;
|
||||||
|
for ( size_t i = 0 ; i < length ; ++i ) {
|
||||||
|
for ( size_t j = 0 ; j < input[i].size() ; ++j , ++sum ) {
|
||||||
|
host_entries( sum ) = input[i][j] ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
deep_copy( output.entries , host_entries );
|
||||||
|
}
|
||||||
|
|
||||||
|
return output ;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* #ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP */
|
||||||
|
|
||||||
101
lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
Executable file
101
lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
Executable file
@ -0,0 +1,101 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <Kokkos_UnorderedMap.hpp>
|
||||||
|
|
||||||
|
namespace Kokkos { namespace Impl {
|
||||||
|
|
||||||
|
uint32_t find_hash_size(uint32_t size)
|
||||||
|
{
|
||||||
|
if (size == 0u) return 0u;
|
||||||
|
|
||||||
|
// these primes try to preserve randomness of hash
|
||||||
|
static const uint32_t primes [] = {
|
||||||
|
3, 7, 13, 23, 53, 97, 193, 389, 769, 1543
|
||||||
|
, 2237, 2423, 2617, 2797, 2999, 3167, 3359, 3539
|
||||||
|
, 3727, 3911, 4441 , 4787 , 5119 , 5471 , 5801 , 6143 , 6521 , 6827
|
||||||
|
, 7177 , 7517 , 7853 , 8887 , 9587 , 10243 , 10937 , 11617 , 12289
|
||||||
|
, 12967 , 13649 , 14341 , 15013 , 15727
|
||||||
|
, 17749 , 19121 , 20479 , 21859 , 23209 , 24593 , 25939 , 27329
|
||||||
|
, 28669 , 30047 , 31469 , 35507 , 38231 , 40961 , 43711 , 46439
|
||||||
|
, 49157 , 51893 , 54617 , 57347 , 60077 , 62801 , 70583 , 75619
|
||||||
|
, 80669 , 85703 , 90749 , 95783 , 100823 , 105871 , 110909 , 115963
|
||||||
|
, 120997 , 126031 , 141157 , 151237 , 161323 , 171401 , 181499 , 191579
|
||||||
|
, 201653 , 211741 , 221813 , 231893 , 241979 , 252079
|
||||||
|
, 282311 , 302483 , 322649 , 342803 , 362969 , 383143 , 403301 , 423457
|
||||||
|
, 443629 , 463787 , 483953 , 504121 , 564617 , 604949 , 645313 , 685609
|
||||||
|
, 725939 , 766273 , 806609 , 846931 , 887261 , 927587 , 967919 , 1008239
|
||||||
|
, 1123477 , 1198397 , 1273289 , 1348177 , 1423067 , 1497983 , 1572869
|
||||||
|
, 1647761 , 1722667 , 1797581 , 1872461 , 1947359 , 2022253
|
||||||
|
, 2246953 , 2396759 , 2546543 , 2696363 , 2846161 , 2995973 , 3145739
|
||||||
|
, 3295541 , 3445357 , 3595117 , 3744941 , 3894707 , 4044503
|
||||||
|
, 4493921 , 4793501 , 5093089 , 5392679 , 5692279 , 5991883 , 6291469
|
||||||
|
, 6591059 , 6890641 , 7190243 , 7489829 , 7789447 , 8089033
|
||||||
|
, 8987807 , 9586981 , 10186177 , 10785371 , 11384539 , 11983729
|
||||||
|
, 12582917 , 13182109 , 13781291 , 14380469 , 14979667 , 15578861
|
||||||
|
, 16178053 , 17895707 , 19014187 , 20132683 , 21251141 , 22369661
|
||||||
|
, 23488103 , 24606583 , 25725083 , 26843549 , 27962027 , 29080529
|
||||||
|
, 30198989 , 31317469 , 32435981 , 35791397 , 38028379 , 40265327
|
||||||
|
, 42502283 , 44739259 , 46976221 , 49213237 , 51450131 , 53687099
|
||||||
|
, 55924061 , 58161041 , 60397993 , 62634959 , 64871921
|
||||||
|
, 71582857 , 76056727 , 80530643 , 85004567 , 89478503 , 93952427
|
||||||
|
, 98426347 , 102900263 , 107374217 , 111848111 , 116322053 , 120795971
|
||||||
|
, 125269877 , 129743807 , 143165587 , 152113427 , 161061283 , 170009141
|
||||||
|
, 178956983 , 187904819 , 196852693 , 205800547 , 214748383 , 223696237
|
||||||
|
, 232644089 , 241591943 , 250539763 , 259487603 , 268435399
|
||||||
|
};
|
||||||
|
|
||||||
|
const uint32_t num_primes = sizeof(primes)/sizeof(uint32_t);
|
||||||
|
|
||||||
|
uint32_t hsize = primes[num_primes-1] ;
|
||||||
|
for (uint32_t i = 0; i < num_primes; ++i) {
|
||||||
|
if (size <= primes[i]) {
|
||||||
|
hsize = primes[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return hsize;
|
||||||
|
}
|
||||||
|
|
||||||
|
}} // namespace Kokkos::Impl
|
||||||
|
|
||||||
297
lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp
Executable file
297
lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp
Executable file
@ -0,0 +1,297 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_UNORDERED_MAP_IMPL_HPP
|
||||||
|
#define KOKKOS_UNORDERED_MAP_IMPL_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Core_fwd.hpp>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <climits>
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
namespace Kokkos { namespace Impl {
|
||||||
|
|
||||||
|
uint32_t find_hash_size( uint32_t size );
|
||||||
|
|
||||||
|
template <typename Map>
|
||||||
|
struct UnorderedMapRehash
|
||||||
|
{
|
||||||
|
typedef Map map_type;
|
||||||
|
typedef typename map_type::const_map_type const_map_type;
|
||||||
|
typedef typename map_type::execution_space execution_space;
|
||||||
|
typedef typename map_type::size_type size_type;
|
||||||
|
|
||||||
|
map_type m_dst;
|
||||||
|
const_map_type m_src;
|
||||||
|
|
||||||
|
UnorderedMapRehash( map_type const& dst, const_map_type const& src)
|
||||||
|
: m_dst(dst), m_src(src)
|
||||||
|
{}
|
||||||
|
|
||||||
|
void apply() const
|
||||||
|
{
|
||||||
|
parallel_for(m_src.capacity(), *this);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(size_type i) const
|
||||||
|
{
|
||||||
|
if ( m_src.valid_at(i) )
|
||||||
|
m_dst.insert(m_src.key_at(i), m_src.value_at(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename UMap>
|
||||||
|
struct UnorderedMapErase
|
||||||
|
{
|
||||||
|
typedef UMap map_type;
|
||||||
|
typedef typename map_type::execution_space execution_space;
|
||||||
|
typedef typename map_type::size_type size_type;
|
||||||
|
typedef typename map_type::key_type key_type;
|
||||||
|
typedef typename map_type::impl_value_type value_type;
|
||||||
|
|
||||||
|
map_type m_map;
|
||||||
|
|
||||||
|
UnorderedMapErase( map_type const& map)
|
||||||
|
: m_map(map)
|
||||||
|
{}
|
||||||
|
|
||||||
|
void apply() const
|
||||||
|
{
|
||||||
|
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()( size_type i ) const
|
||||||
|
{
|
||||||
|
const size_type invalid_index = map_type::invalid_index;
|
||||||
|
|
||||||
|
size_type curr = m_map.m_hash_lists(i);
|
||||||
|
size_type next = invalid_index;
|
||||||
|
|
||||||
|
// remove erased head of the linked-list
|
||||||
|
while (curr != invalid_index && !m_map.valid_at(curr)) {
|
||||||
|
next = m_map.m_next_index[curr];
|
||||||
|
m_map.m_next_index[curr] = invalid_index;
|
||||||
|
m_map.m_keys[curr] = key_type();
|
||||||
|
if (m_map.is_set) m_map.m_values[curr] = value_type();
|
||||||
|
curr = next;
|
||||||
|
m_map.m_hash_lists(i) = next;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if the list is non-empty and the head is valid
|
||||||
|
if (curr != invalid_index && m_map.valid_at(curr) ) {
|
||||||
|
size_type prev = curr;
|
||||||
|
curr = m_map.m_next_index[prev];
|
||||||
|
|
||||||
|
while (curr != invalid_index) {
|
||||||
|
next = m_map.m_next_index[curr];
|
||||||
|
if (m_map.valid_at(curr)) {
|
||||||
|
prev = curr;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// remove curr from list
|
||||||
|
m_map.m_next_index[prev] = next;
|
||||||
|
m_map.m_next_index[curr] = invalid_index;
|
||||||
|
m_map.m_keys[curr] = key_type();
|
||||||
|
if (map_type::is_set) m_map.m_values[curr] = value_type();
|
||||||
|
}
|
||||||
|
curr = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename UMap>
|
||||||
|
struct UnorderedMapHistogram
|
||||||
|
{
|
||||||
|
typedef UMap map_type;
|
||||||
|
typedef typename map_type::execution_space execution_space;
|
||||||
|
typedef typename map_type::size_type size_type;
|
||||||
|
|
||||||
|
typedef View<int[100], execution_space> histogram_view;
|
||||||
|
typedef typename histogram_view::HostMirror host_histogram_view;
|
||||||
|
|
||||||
|
map_type m_map;
|
||||||
|
histogram_view m_length;
|
||||||
|
histogram_view m_distance;
|
||||||
|
histogram_view m_block_distance;
|
||||||
|
|
||||||
|
UnorderedMapHistogram( map_type const& map)
|
||||||
|
: m_map(map)
|
||||||
|
, m_length("UnorderedMap Histogram")
|
||||||
|
, m_distance("UnorderedMap Histogram")
|
||||||
|
, m_block_distance("UnorderedMap Histogram")
|
||||||
|
{}
|
||||||
|
|
||||||
|
void calculate()
|
||||||
|
{
|
||||||
|
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
Kokkos::deep_copy(m_length, 0);
|
||||||
|
Kokkos::deep_copy(m_distance, 0);
|
||||||
|
Kokkos::deep_copy(m_block_distance, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_length(std::ostream &out)
|
||||||
|
{
|
||||||
|
host_histogram_view host_copy = create_mirror_view(m_length);
|
||||||
|
Kokkos::deep_copy(host_copy, m_length);
|
||||||
|
|
||||||
|
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
|
||||||
|
{
|
||||||
|
out << host_copy[i] << " , ";
|
||||||
|
}
|
||||||
|
out << "\b\b\b " << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_distance(std::ostream &out)
|
||||||
|
{
|
||||||
|
host_histogram_view host_copy = create_mirror_view(m_distance);
|
||||||
|
Kokkos::deep_copy(host_copy, m_distance);
|
||||||
|
|
||||||
|
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
|
||||||
|
{
|
||||||
|
out << host_copy[i] << " , ";
|
||||||
|
}
|
||||||
|
out << "\b\b\b " << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_block_distance(std::ostream &out)
|
||||||
|
{
|
||||||
|
host_histogram_view host_copy = create_mirror_view(m_block_distance);
|
||||||
|
Kokkos::deep_copy(host_copy, m_block_distance);
|
||||||
|
|
||||||
|
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
|
||||||
|
{
|
||||||
|
out << host_copy[i] << " , ";
|
||||||
|
}
|
||||||
|
out << "\b\b\b " << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()( size_type i ) const
|
||||||
|
{
|
||||||
|
const size_type invalid_index = map_type::invalid_index;
|
||||||
|
|
||||||
|
uint32_t length = 0;
|
||||||
|
size_type min_index = ~0u, max_index = 0;
|
||||||
|
for (size_type curr = m_map.m_hash_lists(i); curr != invalid_index; curr = m_map.m_next_index[curr]) {
|
||||||
|
++length;
|
||||||
|
min_index = (curr < min_index) ? curr : min_index;
|
||||||
|
max_index = (max_index < curr) ? curr : max_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_type distance = (0u < length) ? max_index - min_index : 0u;
|
||||||
|
size_type blocks = (0u < length) ? max_index/32u - min_index/32u : 0u;
|
||||||
|
|
||||||
|
// normalize data
|
||||||
|
length = length < 100u ? length : 99u;
|
||||||
|
distance = distance < 100u ? distance : 99u;
|
||||||
|
blocks = blocks < 100u ? blocks : 99u;
|
||||||
|
|
||||||
|
if (0u < length)
|
||||||
|
{
|
||||||
|
atomic_fetch_add( &m_length(length), 1);
|
||||||
|
atomic_fetch_add( &m_distance(distance), 1);
|
||||||
|
atomic_fetch_add( &m_block_distance(blocks), 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename UMap>
|
||||||
|
struct UnorderedMapPrint
|
||||||
|
{
|
||||||
|
typedef UMap map_type;
|
||||||
|
typedef typename map_type::execution_space execution_space;
|
||||||
|
typedef typename map_type::size_type size_type;
|
||||||
|
|
||||||
|
map_type m_map;
|
||||||
|
|
||||||
|
UnorderedMapPrint( map_type const& map)
|
||||||
|
: m_map(map)
|
||||||
|
{}
|
||||||
|
|
||||||
|
void apply()
|
||||||
|
{
|
||||||
|
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()( size_type i ) const
|
||||||
|
{
|
||||||
|
const size_type invalid_index = map_type::invalid_index;
|
||||||
|
|
||||||
|
uint32_t list = m_map.m_hash_lists(i);
|
||||||
|
for (size_type curr = list, ii=0; curr != invalid_index; curr = m_map.m_next_index[curr], ++ii) {
|
||||||
|
printf("%d[%d]: %d->%d\n", list, ii, m_map.key_at(curr), m_map.value_at(curr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename DKey, typename DValue, typename SKey, typename SValue>
|
||||||
|
struct UnorderedMapCanAssign : public false_ {};
|
||||||
|
|
||||||
|
template <typename Key, typename Value>
|
||||||
|
struct UnorderedMapCanAssign<Key,Value,Key,Value> : public true_ {};
|
||||||
|
|
||||||
|
template <typename Key, typename Value>
|
||||||
|
struct UnorderedMapCanAssign<const Key,Value,Key,Value> : public true_ {};
|
||||||
|
|
||||||
|
template <typename Key, typename Value>
|
||||||
|
struct UnorderedMapCanAssign<const Key,const Value,Key,Value> : public true_ {};
|
||||||
|
|
||||||
|
template <typename Key, typename Value>
|
||||||
|
struct UnorderedMapCanAssign<const Key,const Value,const Key,Value> : public true_ {};
|
||||||
|
|
||||||
|
|
||||||
|
}} //Kokkos::Impl
|
||||||
|
|
||||||
|
#endif // KOKKOS_UNORDERED_MAP_IMPL_HPP
|
||||||
92
lib/kokkos/containers/unit_tests/Makefile
Executable file
92
lib/kokkos/containers/unit_tests/Makefile
Executable file
@ -0,0 +1,92 @@
|
|||||||
|
KOKKOS_PATH = ../..
|
||||||
|
|
||||||
|
GTEST_PATH = ../../TPL/gtest
|
||||||
|
|
||||||
|
vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests
|
||||||
|
|
||||||
|
default: build_all
|
||||||
|
echo "End Build"
|
||||||
|
|
||||||
|
|
||||||
|
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
CXX = nvcc_wrapper
|
||||||
|
CXXFLAGS ?= -O3
|
||||||
|
LINK = $(CXX)
|
||||||
|
LDFLAGS ?= -lpthread
|
||||||
|
else
|
||||||
|
CXX ?= g++
|
||||||
|
CXXFLAGS ?= -O3
|
||||||
|
LINK ?= $(CXX)
|
||||||
|
LDFLAGS ?= -lpthread
|
||||||
|
endif
|
||||||
|
|
||||||
|
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests
|
||||||
|
|
||||||
|
TEST_TARGETS =
|
||||||
|
TARGETS =
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
|
||||||
|
TARGETS += KokkosContainers_UnitTest_Cuda
|
||||||
|
TEST_TARGETS += test-cuda
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||||
|
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
|
||||||
|
TARGETS += KokkosContainers_UnitTest_Threads
|
||||||
|
TEST_TARGETS += test-threads
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||||
|
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
|
||||||
|
TARGETS += KokkosContainers_UnitTest_OpenMP
|
||||||
|
TEST_TARGETS += test-openmp
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||||
|
OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
|
||||||
|
TARGETS += KokkosContainers_UnitTest_Serial
|
||||||
|
TEST_TARGETS += test-serial
|
||||||
|
endif
|
||||||
|
|
||||||
|
KokkosContainers_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
|
||||||
|
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Cuda
|
||||||
|
|
||||||
|
KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
|
||||||
|
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Threads
|
||||||
|
|
||||||
|
KokkosContainers_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
|
||||||
|
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_OpenMP
|
||||||
|
|
||||||
|
KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
|
||||||
|
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Serial
|
||||||
|
|
||||||
|
test-cuda: KokkosContainers_UnitTest_Cuda
|
||||||
|
./KokkosContainers_UnitTest_Cuda
|
||||||
|
|
||||||
|
test-threads: KokkosContainers_UnitTest_Threads
|
||||||
|
./KokkosContainers_UnitTest_Threads
|
||||||
|
|
||||||
|
test-openmp: KokkosContainers_UnitTest_OpenMP
|
||||||
|
./KokkosContainers_UnitTest_OpenMP
|
||||||
|
|
||||||
|
test-serial: KokkosContainers_UnitTest_Serial
|
||||||
|
./KokkosContainers_UnitTest_Serial
|
||||||
|
|
||||||
|
build_all: $(TARGETS)
|
||||||
|
|
||||||
|
test: $(TEST_TARGETS)
|
||||||
|
|
||||||
|
clean: kokkos-clean
|
||||||
|
rm -f *.o $(TARGETS)
|
||||||
|
|
||||||
|
# Compilation rules
|
||||||
|
|
||||||
|
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||||
|
|
||||||
|
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
|
||||||
|
|
||||||
285
lib/kokkos/containers/unit_tests/TestBitset.hpp
Executable file
285
lib/kokkos/containers/unit_tests/TestBitset.hpp
Executable file
@ -0,0 +1,285 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
|
||||||
|
#ifndef KOKKOS_TEST_BITSET_HPP
|
||||||
|
#define KOKKOS_TEST_BITSET_HPP
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template <typename Bitset, bool Set>
|
||||||
|
struct TestBitset
|
||||||
|
{
|
||||||
|
typedef Bitset bitset_type;
|
||||||
|
typedef typename bitset_type::execution_space execution_space;
|
||||||
|
typedef uint32_t value_type;
|
||||||
|
|
||||||
|
bitset_type m_bitset;
|
||||||
|
|
||||||
|
TestBitset( bitset_type const& bitset)
|
||||||
|
: m_bitset(bitset)
|
||||||
|
{}
|
||||||
|
|
||||||
|
unsigned testit(unsigned collisions)
|
||||||
|
{
|
||||||
|
execution_space::fence();
|
||||||
|
|
||||||
|
unsigned count = 0;
|
||||||
|
Kokkos::parallel_reduce( m_bitset.size()*collisions, *this, count);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void init( value_type & v ) const { v = 0; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void join( volatile value_type & dst, const volatile value_type & src ) const
|
||||||
|
{ dst += src; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(uint32_t i, value_type & v) const
|
||||||
|
{
|
||||||
|
i = i % m_bitset.size();
|
||||||
|
if (Set) {
|
||||||
|
if (m_bitset.set(i)) {
|
||||||
|
if (m_bitset.test(i)) ++v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (m_bitset.reset(i)) {
|
||||||
|
if (!m_bitset.test(i)) ++v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Bitset>
|
||||||
|
struct TestBitsetTest
|
||||||
|
{
|
||||||
|
typedef Bitset bitset_type;
|
||||||
|
typedef typename bitset_type::execution_space execution_space;
|
||||||
|
typedef uint32_t value_type;
|
||||||
|
|
||||||
|
bitset_type m_bitset;
|
||||||
|
|
||||||
|
TestBitsetTest( bitset_type const& bitset)
|
||||||
|
: m_bitset(bitset)
|
||||||
|
{}
|
||||||
|
|
||||||
|
unsigned testit()
|
||||||
|
{
|
||||||
|
execution_space::fence();
|
||||||
|
|
||||||
|
unsigned count = 0;
|
||||||
|
Kokkos::parallel_reduce( m_bitset.size(), *this, count);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void init( value_type & v ) const { v = 0; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void join( volatile value_type & dst, const volatile value_type & src ) const
|
||||||
|
{ dst += src; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(uint32_t i, value_type & v) const
|
||||||
|
{
|
||||||
|
if (m_bitset.test( i )) ++v;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Bitset, bool Set>
|
||||||
|
struct TestBitsetAny
|
||||||
|
{
|
||||||
|
typedef Bitset bitset_type;
|
||||||
|
typedef typename bitset_type::execution_space execution_space;
|
||||||
|
typedef uint32_t value_type;
|
||||||
|
|
||||||
|
bitset_type m_bitset;
|
||||||
|
|
||||||
|
TestBitsetAny( bitset_type const& bitset)
|
||||||
|
: m_bitset(bitset)
|
||||||
|
{}
|
||||||
|
|
||||||
|
unsigned testit()
|
||||||
|
{
|
||||||
|
execution_space::fence();
|
||||||
|
|
||||||
|
unsigned count = 0;
|
||||||
|
Kokkos::parallel_reduce( m_bitset.size(), *this, count);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void init( value_type & v ) const { v = 0; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void join( volatile value_type & dst, const volatile value_type & src ) const
|
||||||
|
{ dst += src; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(uint32_t i, value_type & v) const
|
||||||
|
{
|
||||||
|
bool result = false;
|
||||||
|
unsigned attempts = 0;
|
||||||
|
uint32_t hint = (i >> 4) << 4;
|
||||||
|
while (attempts < m_bitset.max_hint()) {
|
||||||
|
if (Set) {
|
||||||
|
Kokkos::tie(result, hint) = m_bitset.find_any_unset_near(hint, i);
|
||||||
|
if (result && m_bitset.set(hint)) {
|
||||||
|
++v;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (!result) {
|
||||||
|
++attempts;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Kokkos::tie(result, hint) = m_bitset.find_any_set_near(hint, i);
|
||||||
|
if (result && m_bitset.reset(hint)) {
|
||||||
|
++v;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (!result) {
|
||||||
|
++attempts;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
} // namespace Impl
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Device>
|
||||||
|
void test_bitset()
|
||||||
|
{
|
||||||
|
typedef Kokkos::Bitset< Device > bitset_type;
|
||||||
|
typedef Kokkos::ConstBitset< Device > const_bitset_type;
|
||||||
|
|
||||||
|
//unsigned test_sizes[] = { 0u, 1000u, 1u<<14, 1u<<16, 10000001 };
|
||||||
|
unsigned test_sizes[] = { 1000u, 1u<<14, 1u<<16, 10000001 };
|
||||||
|
|
||||||
|
for (int i=0, end = sizeof(test_sizes)/sizeof(unsigned); i<end; ++i) {
|
||||||
|
|
||||||
|
//std::cout << "Bitset " << test_sizes[i] << std::endl;
|
||||||
|
|
||||||
|
bitset_type bitset(test_sizes[i]);
|
||||||
|
|
||||||
|
//std::cout << " Check inital count " << std::endl;
|
||||||
|
// nothing should be set
|
||||||
|
{
|
||||||
|
Impl::TestBitsetTest< bitset_type > f(bitset);
|
||||||
|
uint32_t count = f.testit();
|
||||||
|
EXPECT_EQ(0u, count);
|
||||||
|
EXPECT_EQ(count, bitset.count());
|
||||||
|
}
|
||||||
|
|
||||||
|
//std::cout << " Check set() " << std::endl;
|
||||||
|
bitset.set();
|
||||||
|
// everything should be set
|
||||||
|
{
|
||||||
|
Impl::TestBitsetTest< const_bitset_type > f(bitset);
|
||||||
|
uint32_t count = f.testit();
|
||||||
|
EXPECT_EQ(bitset.size(), count);
|
||||||
|
EXPECT_EQ(count, bitset.count());
|
||||||
|
}
|
||||||
|
|
||||||
|
//std::cout << " Check reset() " << std::endl;
|
||||||
|
bitset.reset();
|
||||||
|
EXPECT_EQ(0u, bitset.count());
|
||||||
|
|
||||||
|
//std::cout << " Check set(i) " << std::endl;
|
||||||
|
// test setting bits
|
||||||
|
{
|
||||||
|
Impl::TestBitset< bitset_type, true > f(bitset);
|
||||||
|
uint32_t count = f.testit(10u);
|
||||||
|
EXPECT_EQ( bitset.size(), bitset.count());
|
||||||
|
EXPECT_EQ( bitset.size(), count );
|
||||||
|
}
|
||||||
|
|
||||||
|
//std::cout << " Check reset(i) " << std::endl;
|
||||||
|
// test resetting bits
|
||||||
|
{
|
||||||
|
Impl::TestBitset< bitset_type, false > f(bitset);
|
||||||
|
uint32_t count = f.testit(10u);
|
||||||
|
EXPECT_EQ( bitset.size(), count);
|
||||||
|
EXPECT_EQ( 0u, bitset.count() );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//std::cout << " Check find_any_set(i) " << std::endl;
|
||||||
|
// test setting any bits
|
||||||
|
{
|
||||||
|
Impl::TestBitsetAny< bitset_type, true > f(bitset);
|
||||||
|
uint32_t count = f.testit();
|
||||||
|
EXPECT_EQ( bitset.size(), bitset.count());
|
||||||
|
EXPECT_EQ( bitset.size(), count );
|
||||||
|
}
|
||||||
|
|
||||||
|
//std::cout << " Check find_any_unset(i) " << std::endl;
|
||||||
|
// test resetting any bits
|
||||||
|
{
|
||||||
|
Impl::TestBitsetAny< bitset_type, false > f(bitset);
|
||||||
|
uint32_t count = f.testit();
|
||||||
|
EXPECT_EQ( bitset.size(), count);
|
||||||
|
EXPECT_EQ( 0u, bitset.count() );
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Test
|
||||||
|
|
||||||
|
#endif //KOKKOS_TEST_BITSET_HPP
|
||||||
|
|
||||||
264
lib/kokkos/containers/unit_tests/TestComplex.hpp
Executable file
264
lib/kokkos/containers/unit_tests/TestComplex.hpp
Executable file
@ -0,0 +1,264 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef KOKKOS_TEST_COMPLEX_HPP
|
||||||
|
#define KOKKOS_TEST_COMPLEX_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Complex.hpp>
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
template <typename RealType>
|
||||||
|
void testComplexConstructors () {
|
||||||
|
typedef Kokkos::complex<RealType> complex_type;
|
||||||
|
|
||||||
|
complex_type z1;
|
||||||
|
complex_type z2 (0.0, 0.0);
|
||||||
|
complex_type z3 (1.0, 0.0);
|
||||||
|
complex_type z4 (0.0, 1.0);
|
||||||
|
complex_type z5 (-1.0, -2.0);
|
||||||
|
|
||||||
|
ASSERT_TRUE( z1 == z2 );
|
||||||
|
ASSERT_TRUE( z1 != z3 );
|
||||||
|
ASSERT_TRUE( z1 != z4 );
|
||||||
|
ASSERT_TRUE( z1 != z5 );
|
||||||
|
|
||||||
|
ASSERT_TRUE( z2 != z3 );
|
||||||
|
ASSERT_TRUE( z2 != z4 );
|
||||||
|
ASSERT_TRUE( z2 != z5 );
|
||||||
|
|
||||||
|
ASSERT_TRUE( z3 != z4 );
|
||||||
|
ASSERT_TRUE( z3 != z5 );
|
||||||
|
|
||||||
|
complex_type z6 (-1.0, -2.0);
|
||||||
|
ASSERT_TRUE( z5 == z6 );
|
||||||
|
|
||||||
|
// Make sure that complex has value semantics, in particular, that
|
||||||
|
// equality tests use values and not pointers, so that
|
||||||
|
// reassignment actually changes the value.
|
||||||
|
z1 = complex_type (-3.0, -4.0);
|
||||||
|
ASSERT_TRUE( z1.real () == -3.0 );
|
||||||
|
ASSERT_TRUE( z1.imag () == -4.0 );
|
||||||
|
ASSERT_TRUE( z1 != z2 );
|
||||||
|
|
||||||
|
complex_type z7 (1.0);
|
||||||
|
ASSERT_TRUE( z3 == z7 );
|
||||||
|
ASSERT_TRUE( z7 == 1.0 );
|
||||||
|
ASSERT_TRUE( z7 != -1.0 );
|
||||||
|
|
||||||
|
z7 = complex_type (5.0);
|
||||||
|
ASSERT_TRUE( z7.real () == 5.0 );
|
||||||
|
ASSERT_TRUE( z7.imag () == 0.0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename RealType>
|
||||||
|
void testPlus () {
|
||||||
|
typedef Kokkos::complex<RealType> complex_type;
|
||||||
|
|
||||||
|
complex_type z1 (1.0, -1.0);
|
||||||
|
complex_type z2 (-1.0, 1.0);
|
||||||
|
complex_type z3 = z1 + z2;
|
||||||
|
ASSERT_TRUE( z3 == complex_type (0.0, 0.0) );
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename RealType>
|
||||||
|
void testMinus () {
|
||||||
|
typedef Kokkos::complex<RealType> complex_type;
|
||||||
|
|
||||||
|
// Test binary minus.
|
||||||
|
complex_type z1 (1.0, -1.0);
|
||||||
|
complex_type z2 (-1.0, 1.0);
|
||||||
|
complex_type z3 = z1 - z2;
|
||||||
|
ASSERT_TRUE( z3 == complex_type (2.0, -2.0) );
|
||||||
|
|
||||||
|
// Test unary minus.
|
||||||
|
complex_type z4 (3.0, -4.0);
|
||||||
|
ASSERT_TRUE( -z1 == complex_type (-3.0, 4.0) );
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename RealType>
|
||||||
|
void testTimes () {
|
||||||
|
typedef Kokkos::complex<RealType> complex_type;
|
||||||
|
|
||||||
|
complex_type z1 (1.0, -1.0);
|
||||||
|
complex_type z2 (-1.0, 1.0);
|
||||||
|
complex_type z3 = z1 - z2;
|
||||||
|
ASSERT_TRUE( z3 == complex_type (2.0, -2.0) );
|
||||||
|
|
||||||
|
// Test unary minus.
|
||||||
|
complex_type z4 (3.0, -4.0);
|
||||||
|
ASSERT_TRUE( z4 == complex_type (3.0, -4.0) );
|
||||||
|
ASSERT_TRUE( -z4 == complex_type (-3.0, 4.0) );
|
||||||
|
ASSERT_TRUE( z4 == -complex_type (-3.0, 4.0) );
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename RealType>
|
||||||
|
void testDivide () {
|
||||||
|
typedef Kokkos::complex<RealType> complex_type;
|
||||||
|
|
||||||
|
// Test division of a complex number by a real number.
|
||||||
|
complex_type z1 (1.0, -1.0);
|
||||||
|
complex_type z2 (1.0 / 2.0, -1.0 / 2.0);
|
||||||
|
ASSERT_TRUE( z1 / 2.0 == z2 );
|
||||||
|
|
||||||
|
// (-1+2i)/(1-i) == ((-1+2i)(1+i)) / ((1-i)(1+i))
|
||||||
|
// (-1+2i)(1+i) == -3 + i
|
||||||
|
complex_type z3 (-1.0, 2.0);
|
||||||
|
complex_type z4 (1.0, -1.0);
|
||||||
|
complex_type z5 (-3.0, 1.0);
|
||||||
|
ASSERT_TRUE(z3 * Kokkos::conj (z4) == z5 );
|
||||||
|
|
||||||
|
// Test division of a complex number by a complex number.
|
||||||
|
// This assumes that RealType is a floating-point type.
|
||||||
|
complex_type z6 (Kokkos::real (z5) / 2.0,
|
||||||
|
Kokkos::imag (z5) / 2.0);
|
||||||
|
|
||||||
|
complex_type z7 = z3 / z4;
|
||||||
|
ASSERT_TRUE( z7 == z6 );
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename RealType>
|
||||||
|
void testOutsideKernel () {
|
||||||
|
testComplexConstructors<RealType> ();
|
||||||
|
testPlus<RealType> ();
|
||||||
|
testTimes<RealType> ();
|
||||||
|
testDivide<RealType> ();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename RealType, typename Device>
|
||||||
|
void testCreateView () {
|
||||||
|
typedef Kokkos::complex<RealType> complex_type;
|
||||||
|
Kokkos::View<complex_type*, Device> x ("x", 10);
|
||||||
|
ASSERT_TRUE( x.dimension_0 () == 10 );
|
||||||
|
|
||||||
|
// Test that View assignment works.
|
||||||
|
Kokkos::View<complex_type*, Device> x_nonconst = x;
|
||||||
|
Kokkos::View<const complex_type*, Device> x_const = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename RealType, typename Device>
|
||||||
|
class Fill {
|
||||||
|
public:
|
||||||
|
typedef typename Device::execution_space execution_space;
|
||||||
|
|
||||||
|
typedef Kokkos::View<Kokkos::complex<RealType>*, Device> view_type;
|
||||||
|
typedef typename view_type::size_type size_type;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator () (const size_type i) const {
|
||||||
|
x_(i) = val_;
|
||||||
|
}
|
||||||
|
|
||||||
|
Fill (const view_type& x, const Kokkos::complex<RealType>& val) :
|
||||||
|
x_ (x), val_ (val)
|
||||||
|
{}
|
||||||
|
|
||||||
|
private:
|
||||||
|
view_type x_;
|
||||||
|
const Kokkos::complex<RealType> val_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename RealType, typename Device>
|
||||||
|
class Sum {
|
||||||
|
public:
|
||||||
|
typedef typename Device::execution_space execution_space;
|
||||||
|
|
||||||
|
typedef Kokkos::View<const Kokkos::complex<RealType>*, Device> view_type;
|
||||||
|
typedef typename view_type::size_type size_type;
|
||||||
|
typedef Kokkos::complex<RealType> value_type;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator () (const size_type i, Kokkos::complex<RealType>& sum) const {
|
||||||
|
sum += x_(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
Sum (const view_type& x) : x_ (x) {}
|
||||||
|
|
||||||
|
private:
|
||||||
|
view_type x_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename RealType, typename Device>
|
||||||
|
void testInsideKernel () {
|
||||||
|
typedef Kokkos::complex<RealType> complex_type;
|
||||||
|
typedef Kokkos::View<complex_type*, Device> view_type;
|
||||||
|
typedef typename view_type::size_type size_type;
|
||||||
|
|
||||||
|
const size_type N = 1000;
|
||||||
|
view_type x ("x", N);
|
||||||
|
ASSERT_TRUE( x.dimension_0 () == N );
|
||||||
|
|
||||||
|
// Kokkos::parallel_reduce (N, [=] (const size_type i, complex_type& result) {
|
||||||
|
// result += x[i];
|
||||||
|
// });
|
||||||
|
|
||||||
|
Kokkos::parallel_for (N, Fill<RealType, Device> (x, complex_type (1.0, -1.0)));
|
||||||
|
|
||||||
|
complex_type sum;
|
||||||
|
Kokkos::parallel_reduce (N, Sum<RealType, Device> (x), sum);
|
||||||
|
|
||||||
|
ASSERT_TRUE( sum.real () == 1000.0 && sum.imag () == -1000.0 );
|
||||||
|
}
|
||||||
|
} // namespace Impl
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Device>
|
||||||
|
void testComplex ()
|
||||||
|
{
|
||||||
|
Impl::testOutsideKernel<float> ();
|
||||||
|
Impl::testOutsideKernel<double> ();
|
||||||
|
|
||||||
|
Impl::testCreateView<float, Device> ();
|
||||||
|
Impl::testCreateView<double, Device> ();
|
||||||
|
|
||||||
|
Impl::testInsideKernel<float, Device> ();
|
||||||
|
Impl::testInsideKernel<double, Device> ();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace Test
|
||||||
|
|
||||||
|
#endif // KOKKOS_TEST_COMPLEX_HPP
|
||||||
206
lib/kokkos/containers/unit_tests/TestCuda.cpp
Executable file
206
lib/kokkos/containers/unit_tests/TestCuda.cpp
Executable file
@ -0,0 +1,206 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#include <Kokkos_Bitset.hpp>
|
||||||
|
#include <Kokkos_UnorderedMap.hpp>
|
||||||
|
#include <Kokkos_Vector.hpp>
|
||||||
|
|
||||||
|
#include <TestBitset.hpp>
|
||||||
|
#include <TestUnorderedMap.hpp>
|
||||||
|
#include <TestStaticCrsGraph.hpp>
|
||||||
|
#include <TestVector.hpp>
|
||||||
|
#include <TestDualView.hpp>
|
||||||
|
#include <TestSegmentedView.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
class cuda : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase()
|
||||||
|
{
|
||||||
|
std::cout << std::setprecision(5) << std::scientific;
|
||||||
|
Kokkos::HostSpace::execution_space::initialize();
|
||||||
|
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
|
||||||
|
}
|
||||||
|
static void TearDownTestCase()
|
||||||
|
{
|
||||||
|
Kokkos::Cuda::finalize();
|
||||||
|
Kokkos::HostSpace::execution_space::finalize();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F( cuda , staticcrsgraph )
|
||||||
|
{
|
||||||
|
TestStaticCrsGraph::run_test_graph< Kokkos::Cuda >();
|
||||||
|
TestStaticCrsGraph::run_test_graph2< Kokkos::Cuda >();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void cuda_test_insert_close( uint32_t num_nodes
|
||||||
|
, uint32_t num_inserts
|
||||||
|
, uint32_t num_duplicates
|
||||||
|
)
|
||||||
|
{
|
||||||
|
test_insert< Kokkos::Cuda >( num_nodes, num_inserts, num_duplicates, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cuda_test_insert_far( uint32_t num_nodes
|
||||||
|
, uint32_t num_inserts
|
||||||
|
, uint32_t num_duplicates
|
||||||
|
)
|
||||||
|
{
|
||||||
|
test_insert< Kokkos::Cuda >( num_nodes, num_inserts, num_duplicates, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cuda_test_failed_insert( uint32_t num_nodes )
|
||||||
|
{
|
||||||
|
test_failed_insert< Kokkos::Cuda >( num_nodes );
|
||||||
|
}
|
||||||
|
|
||||||
|
void cuda_test_deep_copy( uint32_t num_nodes )
|
||||||
|
{
|
||||||
|
test_deep_copy< Kokkos::Cuda >( num_nodes );
|
||||||
|
}
|
||||||
|
|
||||||
|
void cuda_test_vector_combinations(unsigned int size)
|
||||||
|
{
|
||||||
|
test_vector_combinations<int,Kokkos::Cuda>(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cuda_test_dualview_combinations(unsigned int size)
|
||||||
|
{
|
||||||
|
test_dualview_combinations<int,Kokkos::Cuda>(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cuda_test_segmented_view(unsigned int size)
|
||||||
|
{
|
||||||
|
test_segmented_view<double,Kokkos::Cuda>(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cuda_test_bitset()
|
||||||
|
{
|
||||||
|
test_bitset<Kokkos::Cuda>();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*TEST_F( cuda, bitset )
|
||||||
|
{
|
||||||
|
cuda_test_bitset();
|
||||||
|
}*/
|
||||||
|
|
||||||
|
#define CUDA_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat ) \
|
||||||
|
TEST_F( cuda, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
cuda_test_insert_##name(num_nodes,num_inserts,num_duplicates); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CUDA_FAILED_INSERT_TEST( num_nodes, repeat ) \
|
||||||
|
TEST_F( cuda, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
cuda_test_failed_insert(num_nodes); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CUDA_ASSIGNEMENT_TEST( num_nodes, repeat ) \
|
||||||
|
TEST_F( cuda, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
cuda_test_assignment_operators(num_nodes); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CUDA_DEEP_COPY( num_nodes, repeat ) \
|
||||||
|
TEST_F( cuda, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
cuda_test_deep_copy(num_nodes); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CUDA_VECTOR_COMBINE_TEST( size ) \
|
||||||
|
TEST_F( cuda, vector_combination##size##x) { \
|
||||||
|
cuda_test_vector_combinations(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CUDA_DUALVIEW_COMBINE_TEST( size ) \
|
||||||
|
TEST_F( cuda, dualview_combination##size##x) { \
|
||||||
|
cuda_test_dualview_combinations(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CUDA_SEGMENTEDVIEW_TEST( size ) \
|
||||||
|
TEST_F( cuda, segmentedview_##size##x) { \
|
||||||
|
cuda_test_segmented_view(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
CUDA_DUALVIEW_COMBINE_TEST( 10 )
|
||||||
|
CUDA_VECTOR_COMBINE_TEST( 10 )
|
||||||
|
CUDA_VECTOR_COMBINE_TEST( 3057 )
|
||||||
|
|
||||||
|
|
||||||
|
CUDA_INSERT_TEST(close, 100000, 90000, 100, 500)
|
||||||
|
CUDA_INSERT_TEST(far, 100000, 90000, 100, 500)
|
||||||
|
CUDA_DEEP_COPY( 10000, 1 )
|
||||||
|
CUDA_FAILED_INSERT_TEST( 10000, 1000 )
|
||||||
|
CUDA_SEGMENTEDVIEW_TEST( 200 )
|
||||||
|
|
||||||
|
|
||||||
|
#undef CUDA_INSERT_TEST
|
||||||
|
#undef CUDA_FAILED_INSERT_TEST
|
||||||
|
#undef CUDA_ASSIGNEMENT_TEST
|
||||||
|
#undef CUDA_DEEP_COPY
|
||||||
|
#undef CUDA_VECTOR_COMBINE_TEST
|
||||||
|
#undef CUDA_DUALVIEW_COMBINE_TEST
|
||||||
|
#undef CUDA_SEGMENTEDVIEW_TEST
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* #ifdef KOKKOS_HAVE_CUDA */
|
||||||
|
|
||||||
121
lib/kokkos/containers/unit_tests/TestDualView.hpp
Executable file
121
lib/kokkos/containers/unit_tests/TestDualView.hpp
Executable file
@ -0,0 +1,121 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_TEST_DUALVIEW_HPP
|
||||||
|
#define KOKKOS_TEST_DUALVIEW_HPP
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <impl/Kokkos_Timer.hpp>
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template <typename Scalar, class Device>
|
||||||
|
struct test_dualview_combinations
|
||||||
|
{
|
||||||
|
typedef test_dualview_combinations<Scalar,Device> self_type;
|
||||||
|
|
||||||
|
typedef Scalar scalar_type;
|
||||||
|
typedef Device execution_space;
|
||||||
|
|
||||||
|
Scalar reference;
|
||||||
|
Scalar result;
|
||||||
|
|
||||||
|
template <typename ViewType>
|
||||||
|
Scalar run_me(unsigned int n,unsigned int m){
|
||||||
|
if(n<10) n = 10;
|
||||||
|
if(m<3) m = 3;
|
||||||
|
ViewType a("A",n,m);
|
||||||
|
|
||||||
|
Kokkos::deep_copy( a.d_view , 1 );
|
||||||
|
|
||||||
|
a.template modify<typename ViewType::execution_space>();
|
||||||
|
a.template sync<typename ViewType::host_mirror_space>();
|
||||||
|
|
||||||
|
a.h_view(5,1) = 3;
|
||||||
|
a.h_view(6,1) = 4;
|
||||||
|
a.h_view(7,2) = 5;
|
||||||
|
a.template modify<typename ViewType::host_mirror_space>();
|
||||||
|
ViewType b = Kokkos::subview(a,std::pair<unsigned int, unsigned int>(6,9),std::pair<unsigned int, unsigned int>(0,1));
|
||||||
|
a.template sync<typename ViewType::execution_space>();
|
||||||
|
b.template modify<typename ViewType::execution_space>();
|
||||||
|
|
||||||
|
Kokkos::deep_copy( b.d_view , 2 );
|
||||||
|
|
||||||
|
a.template sync<typename ViewType::host_mirror_space>();
|
||||||
|
Scalar count = 0;
|
||||||
|
for(unsigned int i = 0; i<a.d_view.dimension_0(); i++)
|
||||||
|
for(unsigned int j = 0; j<a.d_view.dimension_1(); j++)
|
||||||
|
count += a.h_view(i,j);
|
||||||
|
return count - a.d_view.dimension_0()*a.d_view.dimension_1()-2-4-3*2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
test_dualview_combinations(unsigned int size)
|
||||||
|
{
|
||||||
|
result = run_me< Kokkos::DualView<Scalar**,Kokkos::LayoutLeft,Device> >(size,3);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Scalar, typename Device>
|
||||||
|
void test_dualview_combinations(unsigned int size)
|
||||||
|
{
|
||||||
|
Impl::test_dualview_combinations<Scalar,Device> test(size);
|
||||||
|
ASSERT_EQ( test.result,0);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace Test
|
||||||
|
|
||||||
|
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP
|
||||||
162
lib/kokkos/containers/unit_tests/TestOpenMP.cpp
Executable file
162
lib/kokkos/containers/unit_tests/TestOpenMP.cpp
Executable file
@ -0,0 +1,162 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#include <Kokkos_Bitset.hpp>
|
||||||
|
#include <Kokkos_UnorderedMap.hpp>
|
||||||
|
#include <Kokkos_Vector.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
#include <TestBitset.hpp>
|
||||||
|
#include <TestUnorderedMap.hpp>
|
||||||
|
#include <TestStaticCrsGraph.hpp>
|
||||||
|
#include <TestVector.hpp>
|
||||||
|
#include <TestDualView.hpp>
|
||||||
|
#include <TestSegmentedView.hpp>
|
||||||
|
#include <TestComplex.hpp>
|
||||||
|
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_OPENMP
|
||||||
|
class openmp : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase()
|
||||||
|
{
|
||||||
|
std::cout << std::setprecision(5) << std::scientific;
|
||||||
|
|
||||||
|
unsigned threads_count = 4 ;
|
||||||
|
|
||||||
|
if ( Kokkos::hwloc::available() ) {
|
||||||
|
threads_count = Kokkos::hwloc::get_available_numa_count() *
|
||||||
|
Kokkos::hwloc::get_available_cores_per_numa();
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::OpenMP::initialize( threads_count );
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TearDownTestCase()
|
||||||
|
{
|
||||||
|
Kokkos::OpenMP::finalize();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F( openmp, complex )
|
||||||
|
{
|
||||||
|
testComplex<Kokkos::OpenMP> ();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F( openmp, bitset )
|
||||||
|
{
|
||||||
|
test_bitset<Kokkos::OpenMP>();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F( openmp , staticcrsgraph )
|
||||||
|
{
|
||||||
|
TestStaticCrsGraph::run_test_graph< Kokkos::OpenMP >();
|
||||||
|
TestStaticCrsGraph::run_test_graph2< Kokkos::OpenMP >();
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OPENMP_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
|
||||||
|
TEST_F( openmp, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
test_insert<Kokkos::OpenMP>(num_nodes,num_inserts,num_duplicates, near); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OPENMP_FAILED_INSERT_TEST( num_nodes, repeat ) \
|
||||||
|
TEST_F( openmp, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
test_failed_insert<Kokkos::OpenMP>(num_nodes); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OPENMP_ASSIGNEMENT_TEST( num_nodes, repeat ) \
|
||||||
|
TEST_F( openmp, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
test_assignement_operators<Kokkos::OpenMP>(num_nodes); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OPENMP_DEEP_COPY( num_nodes, repeat ) \
|
||||||
|
TEST_F( openmp, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
test_deep_copy<Kokkos::OpenMP>(num_nodes); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OPENMP_VECTOR_COMBINE_TEST( size ) \
|
||||||
|
TEST_F( openmp, vector_combination##size##x) { \
|
||||||
|
test_vector_combinations<int,Kokkos::OpenMP>(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OPENMP_DUALVIEW_COMBINE_TEST( size ) \
|
||||||
|
TEST_F( openmp, dualview_combination##size##x) { \
|
||||||
|
test_dualview_combinations<int,Kokkos::OpenMP>(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OPENMP_SEGMENTEDVIEW_TEST( size ) \
|
||||||
|
TEST_F( openmp, segmentedview_##size##x) { \
|
||||||
|
test_segmented_view<double,Kokkos::OpenMP>(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
OPENMP_INSERT_TEST(close, 100000, 90000, 100, 500, true)
|
||||||
|
OPENMP_INSERT_TEST(far, 100000, 90000, 100, 500, false)
|
||||||
|
OPENMP_FAILED_INSERT_TEST( 10000, 1000 )
|
||||||
|
OPENMP_DEEP_COPY( 10000, 1 )
|
||||||
|
|
||||||
|
OPENMP_VECTOR_COMBINE_TEST( 10 )
|
||||||
|
OPENMP_VECTOR_COMBINE_TEST( 3057 )
|
||||||
|
OPENMP_DUALVIEW_COMBINE_TEST( 10 )
|
||||||
|
OPENMP_SEGMENTEDVIEW_TEST( 10000 )
|
||||||
|
|
||||||
|
#undef OPENMP_INSERT_TEST
|
||||||
|
#undef OPENMP_FAILED_INSERT_TEST
|
||||||
|
#undef OPENMP_ASSIGNEMENT_TEST
|
||||||
|
#undef OPENMP_DEEP_COPY
|
||||||
|
#undef OPENMP_VECTOR_COMBINE_TEST
|
||||||
|
#undef OPENMP_DUALVIEW_COMBINE_TEST
|
||||||
|
#undef OPENMP_SEGMENTEDVIEW_TEST
|
||||||
|
#endif
|
||||||
|
} // namespace test
|
||||||
|
|
||||||
708
lib/kokkos/containers/unit_tests/TestSegmentedView.hpp
Executable file
708
lib/kokkos/containers/unit_tests/TestSegmentedView.hpp
Executable file
@ -0,0 +1,708 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP
|
||||||
|
#define KOKKOS_TEST_SEGMENTEDVIEW_HPP
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
||||||
|
|
||||||
|
#include <Kokkos_SegmentedView.hpp>
|
||||||
|
#include <impl/Kokkos_Timer.hpp>
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
|
||||||
|
struct GrowTest;
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct GrowTest<ViewType , ExecutionSpace , 1> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
GrowTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
a.grow(team_member , team_idx+team_member.team_size());
|
||||||
|
value += team_idx + team_member.team_rank();
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+team_member.team_rank()))
|
||||||
|
a(team_idx+team_member.team_rank()) = team_idx+team_member.team_rank();
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct GrowTest<ViewType , ExecutionSpace , 2> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
GrowTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
a.grow(team_member , team_idx+ team_member.team_size());
|
||||||
|
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||||
|
value += team_idx + team_member.team_rank() + 13*k;
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) {
|
||||||
|
a(team_idx+ team_member.team_rank(),k) =
|
||||||
|
team_idx+ team_member.team_rank() + 13*k;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct GrowTest<ViewType , ExecutionSpace , 3> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
GrowTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
a.grow(team_member , team_idx+ team_member.team_size());
|
||||||
|
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
||||||
|
value += team_idx + team_member.team_rank() + 13*k + 3*l;
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||||
|
a(team_idx+ team_member.team_rank(),k,l) =
|
||||||
|
team_idx+ team_member.team_rank() + 13*k + 3*l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct GrowTest<ViewType , ExecutionSpace , 4> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
GrowTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
a.grow(team_member , team_idx+ team_member.team_size());
|
||||||
|
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
||||||
|
value += team_idx + team_member.team_rank() + 13*k + 3*l + 7*m;
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||||
|
a(team_idx+ team_member.team_rank(),k,l,m) =
|
||||||
|
team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct GrowTest<ViewType , ExecutionSpace , 5> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
GrowTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
a.grow(team_member , team_idx+ team_member.team_size());
|
||||||
|
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
||||||
|
for( typename ExecutionSpace::size_type n=0;n<3;n++)
|
||||||
|
value +=
|
||||||
|
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||||
|
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||||
|
a(team_idx+ team_member.team_rank(),k,l,m,n) =
|
||||||
|
team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct GrowTest<ViewType , ExecutionSpace , 6> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
GrowTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
a.grow(team_member , team_idx+ team_member.team_size());
|
||||||
|
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
||||||
|
for( typename ExecutionSpace::size_type n=0;n<3;n++)
|
||||||
|
for( typename ExecutionSpace::size_type o=0;o<2;o++)
|
||||||
|
value +=
|
||||||
|
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||||
|
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||||
|
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
||||||
|
a(team_idx+ team_member.team_rank(),k,l,m,n,o) =
|
||||||
|
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct GrowTest<ViewType , ExecutionSpace , 7> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
GrowTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
a.grow(team_member , team_idx+ team_member.team_size());
|
||||||
|
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
||||||
|
for( typename ExecutionSpace::size_type n=0;n<3;n++)
|
||||||
|
for( typename ExecutionSpace::size_type o=0;o<2;o++)
|
||||||
|
for( typename ExecutionSpace::size_type p=0;p<4;p++)
|
||||||
|
value +=
|
||||||
|
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||||
|
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||||
|
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
||||||
|
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
|
||||||
|
a(team_idx+ team_member.team_rank(),k,l,m,n,o,p) =
|
||||||
|
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct GrowTest<ViewType , ExecutionSpace , 8> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
GrowTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
a.grow(team_member , team_idx + team_member.team_size());
|
||||||
|
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
||||||
|
for( typename ExecutionSpace::size_type n=0;n<3;n++)
|
||||||
|
for( typename ExecutionSpace::size_type o=0;o<2;o++)
|
||||||
|
for( typename ExecutionSpace::size_type p=0;p<4;p++)
|
||||||
|
for( typename ExecutionSpace::size_type q=0;q<3;q++)
|
||||||
|
value +=
|
||||||
|
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||||
|
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||||
|
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
||||||
|
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
|
||||||
|
for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
|
||||||
|
a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q) =
|
||||||
|
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
|
||||||
|
struct VerifyTest;
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct VerifyTest<ViewType , ExecutionSpace , 1> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
VerifyTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
value += a(team_idx+ team_member.team_rank());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct VerifyTest<ViewType , ExecutionSpace , 2> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
VerifyTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||||
|
value += a(team_idx+ team_member.team_rank(),k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct VerifyTest<ViewType , ExecutionSpace , 3> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
VerifyTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||||
|
value += a(team_idx+ team_member.team_rank(),k,l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct VerifyTest<ViewType , ExecutionSpace , 4> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
VerifyTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||||
|
value += a(team_idx+ team_member.team_rank(),k,l,m);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct VerifyTest<ViewType , ExecutionSpace , 5> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
VerifyTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||||
|
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||||
|
value += a(team_idx+ team_member.team_rank(),k,l,m,n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct VerifyTest<ViewType , ExecutionSpace , 6> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
VerifyTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||||
|
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||||
|
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
||||||
|
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct VerifyTest<ViewType , ExecutionSpace , 7> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
VerifyTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||||
|
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||||
|
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
||||||
|
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
|
||||||
|
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ViewType , class ExecutionSpace>
|
||||||
|
struct VerifyTest<ViewType , ExecutionSpace , 8> {
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
typedef typename Policy::member_type team_type;
|
||||||
|
typedef double value_type;
|
||||||
|
|
||||||
|
ViewType a;
|
||||||
|
|
||||||
|
VerifyTest(ViewType in):a(in) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (team_type team_member, double& value) const {
|
||||||
|
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||||
|
|
||||||
|
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||||
|
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||||
|
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||||
|
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||||
|
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||||
|
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||||
|
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
||||||
|
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
|
||||||
|
for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
|
||||||
|
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Scalar, class ExecutionSpace>
|
||||||
|
struct test_segmented_view
|
||||||
|
{
|
||||||
|
typedef test_segmented_view<Scalar,ExecutionSpace> self_type;
|
||||||
|
|
||||||
|
typedef Scalar scalar_type;
|
||||||
|
typedef ExecutionSpace execution_space;
|
||||||
|
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||||
|
|
||||||
|
double result;
|
||||||
|
double reference;
|
||||||
|
|
||||||
|
template <class ViewType>
|
||||||
|
void run_me(ViewType a, int max_length){
|
||||||
|
const int team_size = Policy::team_size_max( GrowTest<ViewType,execution_space>(a) );
|
||||||
|
const int nteams = max_length/team_size;
|
||||||
|
|
||||||
|
reference = 0;
|
||||||
|
result = 0;
|
||||||
|
|
||||||
|
Kokkos::parallel_reduce(Policy(nteams,team_size),GrowTest<ViewType,execution_space>(a),reference);
|
||||||
|
Kokkos::fence();
|
||||||
|
Kokkos::parallel_reduce(Policy(nteams,team_size),VerifyTest<ViewType,execution_space>(a),result);
|
||||||
|
Kokkos::fence();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
test_segmented_view(unsigned int size,int rank)
|
||||||
|
{
|
||||||
|
reference = 0;
|
||||||
|
result = 0;
|
||||||
|
|
||||||
|
const int dim_1 = 7;
|
||||||
|
const int dim_2 = 3;
|
||||||
|
const int dim_3 = 2;
|
||||||
|
const int dim_4 = 3;
|
||||||
|
const int dim_5 = 2;
|
||||||
|
const int dim_6 = 4;
|
||||||
|
//const int dim_7 = 3;
|
||||||
|
|
||||||
|
if(rank==1) {
|
||||||
|
typedef Kokkos::Experimental::SegmentedView<Scalar*,Kokkos::LayoutLeft,ExecutionSpace> rank1_view;
|
||||||
|
run_me< rank1_view >(rank1_view("Rank1",128,size), size);
|
||||||
|
}
|
||||||
|
if(rank==2) {
|
||||||
|
typedef Kokkos::Experimental::SegmentedView<Scalar**,Kokkos::LayoutLeft,ExecutionSpace> rank2_view;
|
||||||
|
run_me< rank2_view >(rank2_view("Rank2",128,size,dim_1), size);
|
||||||
|
}
|
||||||
|
if(rank==3) {
|
||||||
|
typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2],Kokkos::LayoutRight,ExecutionSpace> rank3_view;
|
||||||
|
run_me< rank3_view >(rank3_view("Rank3",128,size), size);
|
||||||
|
}
|
||||||
|
if(rank==4) {
|
||||||
|
typedef Kokkos::Experimental::SegmentedView<Scalar****,Kokkos::LayoutRight,ExecutionSpace> rank4_view;
|
||||||
|
run_me< rank4_view >(rank4_view("Rank4",128,size,dim_1,dim_2,dim_3), size);
|
||||||
|
}
|
||||||
|
if(rank==5) {
|
||||||
|
typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2][3],Kokkos::LayoutLeft,ExecutionSpace> rank5_view;
|
||||||
|
run_me< rank5_view >(rank5_view("Rank5",128,size), size);
|
||||||
|
}
|
||||||
|
if(rank==6) {
|
||||||
|
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2],Kokkos::LayoutRight,ExecutionSpace> rank6_view;
|
||||||
|
run_me< rank6_view >(rank6_view("Rank6",128,size,dim_1,dim_2,dim_3,dim_4), size);
|
||||||
|
}
|
||||||
|
if(rank==7) {
|
||||||
|
typedef Kokkos::Experimental::SegmentedView<Scalar*******,Kokkos::LayoutLeft,ExecutionSpace> rank7_view;
|
||||||
|
run_me< rank7_view >(rank7_view("Rank7",128,size,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6), size);
|
||||||
|
}
|
||||||
|
if(rank==8) {
|
||||||
|
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> rank8_view;
|
||||||
|
run_me< rank8_view >(rank8_view("Rank8",128,size,dim_1,dim_2,dim_3,dim_4), size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Scalar, class ExecutionSpace>
|
||||||
|
void test_segmented_view(unsigned int size)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> view_type;
|
||||||
|
view_type a("A",128,size,7,3,2,3);
|
||||||
|
double reference;
|
||||||
|
|
||||||
|
Impl::GrowTest<view_type,ExecutionSpace> f(a);
|
||||||
|
|
||||||
|
const int team_size = Kokkos::TeamPolicy<ExecutionSpace>::team_size_max( f );
|
||||||
|
const int nteams = (size+team_size-1)/team_size;
|
||||||
|
|
||||||
|
Kokkos::parallel_reduce(Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size),f,reference);
|
||||||
|
|
||||||
|
size_t real_size = ((size+127)/128)*128;
|
||||||
|
|
||||||
|
ASSERT_EQ(real_size,a.dimension_0());
|
||||||
|
ASSERT_EQ(7,a.dimension_1());
|
||||||
|
ASSERT_EQ(3,a.dimension_2());
|
||||||
|
ASSERT_EQ(2,a.dimension_3());
|
||||||
|
ASSERT_EQ(3,a.dimension_4());
|
||||||
|
ASSERT_EQ(2,a.dimension_5());
|
||||||
|
ASSERT_EQ(4,a.dimension_6());
|
||||||
|
ASSERT_EQ(3,a.dimension_7());
|
||||||
|
ASSERT_EQ(real_size,a.dimension(0));
|
||||||
|
ASSERT_EQ(7,a.dimension(1));
|
||||||
|
ASSERT_EQ(3,a.dimension(2));
|
||||||
|
ASSERT_EQ(2,a.dimension(3));
|
||||||
|
ASSERT_EQ(3,a.dimension(4));
|
||||||
|
ASSERT_EQ(2,a.dimension(5));
|
||||||
|
ASSERT_EQ(4,a.dimension(6));
|
||||||
|
ASSERT_EQ(3,a.dimension(7));
|
||||||
|
ASSERT_EQ(8,a.Rank);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,1);
|
||||||
|
ASSERT_EQ(test.reference,test.result);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,2);
|
||||||
|
ASSERT_EQ(test.reference,test.result);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,3);
|
||||||
|
ASSERT_EQ(test.reference,test.result);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,4);
|
||||||
|
ASSERT_EQ(test.reference,test.result);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,5);
|
||||||
|
ASSERT_EQ(test.reference,test.result);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,6);
|
||||||
|
ASSERT_EQ(test.reference,test.result);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,7);
|
||||||
|
ASSERT_EQ(test.reference,test.result);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,8);
|
||||||
|
ASSERT_EQ(test.reference,test.result);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace Test
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
template <typename Scalar, class ExecutionSpace>
|
||||||
|
void test_segmented_view(unsigned int ) {}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* #ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP */
|
||||||
|
|
||||||
158
lib/kokkos/containers/unit_tests/TestSerial.cpp
Executable file
158
lib/kokkos/containers/unit_tests/TestSerial.cpp
Executable file
@ -0,0 +1,158 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#if ! defined(KOKKOS_HAVE_SERIAL)
|
||||||
|
# error "It doesn't make sense to build this file unless the Kokkos::Serial device is enabled. If you see this message, it probably means that there is an error in Kokkos' CMake build infrastructure."
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include <Kokkos_Bitset.hpp>
|
||||||
|
#include <Kokkos_UnorderedMap.hpp>
|
||||||
|
#include <Kokkos_Vector.hpp>
|
||||||
|
|
||||||
|
#include <TestBitset.hpp>
|
||||||
|
#include <TestUnorderedMap.hpp>
|
||||||
|
#include <TestStaticCrsGraph.hpp>
|
||||||
|
#include <TestVector.hpp>
|
||||||
|
#include <TestDualView.hpp>
|
||||||
|
#include <TestSegmentedView.hpp>
|
||||||
|
#include <TestComplex.hpp>
|
||||||
|
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
class serial : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase () {
|
||||||
|
std::cout << std::setprecision(5) << std::scientific;
|
||||||
|
Kokkos::Serial::initialize ();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TearDownTestCase () {
|
||||||
|
Kokkos::Serial::finalize ();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
TEST_F( serial , staticcrsgraph )
|
||||||
|
{
|
||||||
|
TestStaticCrsGraph::run_test_graph< Kokkos::Serial >();
|
||||||
|
TestStaticCrsGraph::run_test_graph2< Kokkos::Serial >();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F( serial, complex )
|
||||||
|
{
|
||||||
|
testComplex<Kokkos::Serial> ();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F( serial, bitset )
|
||||||
|
{
|
||||||
|
test_bitset<Kokkos::Serial> ();
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SERIAL_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
|
||||||
|
TEST_F( serial, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
test_insert<Kokkos::Serial> (num_nodes, num_inserts, num_duplicates, near); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SERIAL_FAILED_INSERT_TEST( num_nodes, repeat ) \
|
||||||
|
TEST_F( serial, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
test_failed_insert<Kokkos::Serial> (num_nodes); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SERIAL_ASSIGNEMENT_TEST( num_nodes, repeat ) \
|
||||||
|
TEST_F( serial, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
test_assignement_operators<Kokkos::Serial> (num_nodes); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SERIAL_DEEP_COPY( num_nodes, repeat ) \
|
||||||
|
TEST_F( serial, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
test_deep_copy<Kokkos::Serial> (num_nodes); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SERIAL_VECTOR_COMBINE_TEST( size ) \
|
||||||
|
TEST_F( serial, vector_combination##size##x) { \
|
||||||
|
test_vector_combinations<int,Kokkos::Serial>(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SERIAL_DUALVIEW_COMBINE_TEST( size ) \
|
||||||
|
TEST_F( serial, dualview_combination##size##x) { \
|
||||||
|
test_dualview_combinations<int,Kokkos::Serial>(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SERIAL_SEGMENTEDVIEW_TEST( size ) \
|
||||||
|
TEST_F( serial, segmentedview_##size##x) { \
|
||||||
|
test_segmented_view<double,Kokkos::Serial>(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
SERIAL_INSERT_TEST(close, 100000, 90000, 100, 500, true)
|
||||||
|
SERIAL_INSERT_TEST(far, 100000, 90000, 100, 500, false)
|
||||||
|
SERIAL_FAILED_INSERT_TEST( 10000, 1000 )
|
||||||
|
SERIAL_DEEP_COPY( 10000, 1 )
|
||||||
|
|
||||||
|
SERIAL_VECTOR_COMBINE_TEST( 10 )
|
||||||
|
SERIAL_VECTOR_COMBINE_TEST( 3057 )
|
||||||
|
SERIAL_DUALVIEW_COMBINE_TEST( 10 )
|
||||||
|
SERIAL_SEGMENTEDVIEW_TEST( 10000 )
|
||||||
|
|
||||||
|
#undef SERIAL_INSERT_TEST
|
||||||
|
#undef SERIAL_FAILED_INSERT_TEST
|
||||||
|
#undef SERIAL_ASSIGNEMENT_TEST
|
||||||
|
#undef SERIAL_DEEP_COPY
|
||||||
|
#undef SERIAL_VECTOR_COMBINE_TEST
|
||||||
|
#undef SERIAL_DUALVIEW_COMBINE_TEST
|
||||||
|
#undef SERIAL_SEGMENTEDVIEW_TEST
|
||||||
|
|
||||||
|
} // namespace test
|
||||||
|
|
||||||
|
#endif // KOKKOS_HAVE_SERIAL
|
||||||
|
|
||||||
|
|
||||||
149
lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp
Executable file
149
lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp
Executable file
@ -0,0 +1,149 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <Kokkos_StaticCrsGraph.hpp>
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace TestStaticCrsGraph {
|
||||||
|
|
||||||
|
template< class Space >
|
||||||
|
void run_test_graph()
|
||||||
|
{
|
||||||
|
typedef Kokkos::StaticCrsGraph< unsigned , Space > dView ;
|
||||||
|
typedef typename dView::HostMirror hView ;
|
||||||
|
|
||||||
|
const unsigned LENGTH = 1000 ;
|
||||||
|
dView dx ;
|
||||||
|
hView hx ;
|
||||||
|
|
||||||
|
std::vector< std::vector< int > > graph( LENGTH );
|
||||||
|
|
||||||
|
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
||||||
|
graph[i].reserve(8);
|
||||||
|
for ( size_t j = 0 ; j < 8 ; ++j ) {
|
||||||
|
graph[i].push_back( i + j * 3 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dx = Kokkos::create_staticcrsgraph<dView>( "dx" , graph );
|
||||||
|
hx = Kokkos::create_mirror( dx );
|
||||||
|
|
||||||
|
ASSERT_EQ( hx.row_map.dimension_0() - 1 , LENGTH );
|
||||||
|
|
||||||
|
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
||||||
|
const size_t begin = hx.row_map[i];
|
||||||
|
const size_t n = hx.row_map[i+1] - begin ;
|
||||||
|
ASSERT_EQ( n , graph[i].size() );
|
||||||
|
for ( size_t j = 0 ; j < n ; ++j ) {
|
||||||
|
ASSERT_EQ( (int) hx.entries( j + begin ) , graph[i][j] );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class Space >
|
||||||
|
void run_test_graph2()
|
||||||
|
{
|
||||||
|
typedef Kokkos::StaticCrsGraph< unsigned[3] , Space > dView ;
|
||||||
|
typedef typename dView::HostMirror hView ;
|
||||||
|
|
||||||
|
const unsigned LENGTH = 10 ;
|
||||||
|
|
||||||
|
std::vector< size_t > sizes( LENGTH );
|
||||||
|
|
||||||
|
size_t total_length = 0 ;
|
||||||
|
|
||||||
|
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
||||||
|
total_length += ( sizes[i] = 6 + i % 4 );
|
||||||
|
}
|
||||||
|
|
||||||
|
dView dx = Kokkos::create_staticcrsgraph<dView>( "test" , sizes );
|
||||||
|
hView hx = Kokkos::create_mirror( dx );
|
||||||
|
hView mx = Kokkos::create_mirror( dx );
|
||||||
|
|
||||||
|
ASSERT_EQ( (size_t) dx.row_map.dimension_0() , (size_t) LENGTH + 1 );
|
||||||
|
ASSERT_EQ( (size_t) hx.row_map.dimension_0() , (size_t) LENGTH + 1 );
|
||||||
|
ASSERT_EQ( (size_t) mx.row_map.dimension_0() , (size_t) LENGTH + 1 );
|
||||||
|
|
||||||
|
ASSERT_EQ( (size_t) dx.entries.dimension_0() , (size_t) total_length );
|
||||||
|
ASSERT_EQ( (size_t) hx.entries.dimension_0() , (size_t) total_length );
|
||||||
|
ASSERT_EQ( (size_t) mx.entries.dimension_0() , (size_t) total_length );
|
||||||
|
|
||||||
|
ASSERT_EQ( (size_t) dx.entries.dimension_1() , (size_t) 3 );
|
||||||
|
ASSERT_EQ( (size_t) hx.entries.dimension_1() , (size_t) 3 );
|
||||||
|
ASSERT_EQ( (size_t) mx.entries.dimension_1() , (size_t) 3 );
|
||||||
|
|
||||||
|
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
||||||
|
const size_t entry_begin = hx.row_map[i];
|
||||||
|
const size_t entry_end = hx.row_map[i+1];
|
||||||
|
for ( size_t j = entry_begin ; j < entry_end ; ++j ) {
|
||||||
|
hx.entries(j,0) = j + 1 ;
|
||||||
|
hx.entries(j,1) = j + 2 ;
|
||||||
|
hx.entries(j,2) = j + 3 ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::deep_copy( dx.entries , hx.entries );
|
||||||
|
Kokkos::deep_copy( mx.entries , dx.entries );
|
||||||
|
|
||||||
|
ASSERT_EQ( mx.row_map.dimension_0() , (size_t) LENGTH + 1 );
|
||||||
|
|
||||||
|
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
||||||
|
const size_t entry_begin = mx.row_map[i];
|
||||||
|
const size_t entry_end = mx.row_map[i+1];
|
||||||
|
ASSERT_EQ( ( entry_end - entry_begin ) , sizes[i] );
|
||||||
|
for ( size_t j = entry_begin ; j < entry_end ; ++j ) {
|
||||||
|
ASSERT_EQ( (size_t) mx.entries( j , 0 ) , ( j + 1 ) );
|
||||||
|
ASSERT_EQ( (size_t) mx.entries( j , 1 ) , ( j + 2 ) );
|
||||||
|
ASSERT_EQ( (size_t) mx.entries( j , 2 ) , ( j + 3 ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} /* namespace TestStaticCrsGraph */
|
||||||
|
|
||||||
|
|
||||||
168
lib/kokkos/containers/unit_tests/TestThreads.cpp
Executable file
168
lib/kokkos/containers/unit_tests/TestThreads.cpp
Executable file
@ -0,0 +1,168 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||||
|
|
||||||
|
#include <Kokkos_Bitset.hpp>
|
||||||
|
#include <Kokkos_UnorderedMap.hpp>
|
||||||
|
|
||||||
|
#include <Kokkos_Vector.hpp>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
#include <TestBitset.hpp>
|
||||||
|
#include <TestUnorderedMap.hpp>
|
||||||
|
#include <TestStaticCrsGraph.hpp>
|
||||||
|
|
||||||
|
#include <TestVector.hpp>
|
||||||
|
#include <TestDualView.hpp>
|
||||||
|
#include <TestSegmentedView.hpp>
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
class threads : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase()
|
||||||
|
{
|
||||||
|
std::cout << std::setprecision(5) << std::scientific;
|
||||||
|
|
||||||
|
unsigned num_threads = 4;
|
||||||
|
|
||||||
|
if (Kokkos::hwloc::available()) {
|
||||||
|
num_threads = Kokkos::hwloc::get_available_numa_count()
|
||||||
|
* Kokkos::hwloc::get_available_cores_per_numa()
|
||||||
|
// * Kokkos::hwloc::get_available_threads_per_core()
|
||||||
|
;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Threads: " << num_threads << std::endl;
|
||||||
|
|
||||||
|
Kokkos::Threads::initialize( num_threads );
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TearDownTestCase()
|
||||||
|
{
|
||||||
|
Kokkos::Threads::finalize();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F( threads , staticcrsgraph )
|
||||||
|
{
|
||||||
|
TestStaticCrsGraph::run_test_graph< Kokkos::Threads >();
|
||||||
|
TestStaticCrsGraph::run_test_graph2< Kokkos::Threads >();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*TEST_F( threads, bitset )
|
||||||
|
{
|
||||||
|
test_bitset<Kokkos::Threads>();
|
||||||
|
}*/
|
||||||
|
|
||||||
|
#define THREADS_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
|
||||||
|
TEST_F( threads, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
test_insert<Kokkos::Threads>(num_nodes,num_inserts,num_duplicates, near); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define THREADS_FAILED_INSERT_TEST( num_nodes, repeat ) \
|
||||||
|
TEST_F( threads, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
test_failed_insert<Kokkos::Threads>(num_nodes); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define THREADS_ASSIGNEMENT_TEST( num_nodes, repeat ) \
|
||||||
|
TEST_F( threads, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
test_assignement_operators<Kokkos::Threads>(num_nodes); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define THREADS_DEEP_COPY( num_nodes, repeat ) \
|
||||||
|
TEST_F( threads, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
|
||||||
|
for (int i=0; i<repeat; ++i) \
|
||||||
|
test_deep_copy<Kokkos::Threads>(num_nodes); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define THREADS_VECTOR_COMBINE_TEST( size ) \
|
||||||
|
TEST_F( threads, vector_combination##size##x) { \
|
||||||
|
test_vector_combinations<int,Kokkos::Threads>(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define THREADS_DUALVIEW_COMBINE_TEST( size ) \
|
||||||
|
TEST_F( threads, dualview_combination##size##x) { \
|
||||||
|
test_dualview_combinations<int,Kokkos::Threads>(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define THREADS_SEGMENTEDVIEW_TEST( size ) \
|
||||||
|
TEST_F( threads, segmentedview_##size##x) { \
|
||||||
|
test_segmented_view<double,Kokkos::Threads>(size); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
THREADS_INSERT_TEST(far, 100000, 90000, 100, 500, false)
|
||||||
|
THREADS_FAILED_INSERT_TEST( 10000, 1000 )
|
||||||
|
THREADS_DEEP_COPY( 10000, 1 )
|
||||||
|
|
||||||
|
THREADS_VECTOR_COMBINE_TEST( 10 )
|
||||||
|
THREADS_VECTOR_COMBINE_TEST( 3057 )
|
||||||
|
THREADS_DUALVIEW_COMBINE_TEST( 10 )
|
||||||
|
THREADS_SEGMENTEDVIEW_TEST( 10000 )
|
||||||
|
|
||||||
|
|
||||||
|
#undef THREADS_INSERT_TEST
|
||||||
|
#undef THREADS_FAILED_INSERT_TEST
|
||||||
|
#undef THREADS_ASSIGNEMENT_TEST
|
||||||
|
#undef THREADS_DEEP_COPY
|
||||||
|
#undef THREADS_VECTOR_COMBINE_TEST
|
||||||
|
#undef THREADS_DUALVIEW_COMBINE_TEST
|
||||||
|
#undef THREADS_SEGMENTEDVIEW_TEST
|
||||||
|
|
||||||
|
} // namespace Test
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
|
||||||
|
|
||||||
313
lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp
Executable file
313
lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp
Executable file
@ -0,0 +1,313 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
|
||||||
|
#ifndef KOKKOS_TEST_UNORDERED_MAP_HPP
|
||||||
|
#define KOKKOS_TEST_UNORDERED_MAP_HPP
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template <typename MapType, bool Near = false>
|
||||||
|
struct TestInsert
|
||||||
|
{
|
||||||
|
typedef MapType map_type;
|
||||||
|
typedef typename map_type::execution_space execution_space;
|
||||||
|
typedef uint32_t value_type;
|
||||||
|
|
||||||
|
map_type map;
|
||||||
|
uint32_t inserts;
|
||||||
|
uint32_t collisions;
|
||||||
|
|
||||||
|
TestInsert( map_type arg_map, uint32_t arg_inserts, uint32_t arg_collisions)
|
||||||
|
: map(arg_map)
|
||||||
|
, inserts(arg_inserts)
|
||||||
|
, collisions(arg_collisions)
|
||||||
|
{}
|
||||||
|
|
||||||
|
void testit( bool rehash_on_fail = true )
|
||||||
|
{
|
||||||
|
execution_space::fence();
|
||||||
|
|
||||||
|
uint32_t failed_count = 0;
|
||||||
|
do {
|
||||||
|
failed_count = 0;
|
||||||
|
Kokkos::parallel_reduce(inserts, *this, failed_count);
|
||||||
|
|
||||||
|
if (rehash_on_fail && failed_count > 0u) {
|
||||||
|
const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + failed_count/collisions ;
|
||||||
|
map.rehash( new_capacity );
|
||||||
|
}
|
||||||
|
} while (rehash_on_fail && failed_count > 0u);
|
||||||
|
|
||||||
|
execution_space::fence();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void init( value_type & failed_count ) const { failed_count = 0; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void join( volatile value_type & failed_count, const volatile value_type & count ) const
|
||||||
|
{ failed_count += count; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(uint32_t i, value_type & failed_count) const
|
||||||
|
{
|
||||||
|
const uint32_t key = Near ? i/collisions : i%(inserts/collisions);
|
||||||
|
if (map.insert(key,i).failed()) ++failed_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename MapType, bool Near>
|
||||||
|
struct TestErase
|
||||||
|
{
|
||||||
|
typedef TestErase<MapType, Near> self_type;
|
||||||
|
|
||||||
|
typedef MapType map_type;
|
||||||
|
typedef typename MapType::execution_space execution_space;
|
||||||
|
|
||||||
|
map_type m_map;
|
||||||
|
uint32_t m_num_erase;
|
||||||
|
uint32_t m_num_duplicates;
|
||||||
|
|
||||||
|
TestErase(map_type map, uint32_t num_erases, uint32_t num_duplicates)
|
||||||
|
: m_map(map)
|
||||||
|
, m_num_erase(num_erases)
|
||||||
|
, m_num_duplicates(num_duplicates)
|
||||||
|
{}
|
||||||
|
|
||||||
|
void testit()
|
||||||
|
{
|
||||||
|
execution_space::fence();
|
||||||
|
Kokkos::parallel_for(m_num_erase, *this);
|
||||||
|
execution_space::fence();
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(typename execution_space::size_type i) const
|
||||||
|
{
|
||||||
|
if (Near) {
|
||||||
|
m_map.erase(i/m_num_duplicates);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
m_map.erase(i%(m_num_erase/m_num_duplicates));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename MapType>
|
||||||
|
struct TestFind
|
||||||
|
{
|
||||||
|
typedef MapType map_type;
|
||||||
|
typedef typename MapType::execution_space::execution_space execution_space;
|
||||||
|
typedef uint32_t value_type;
|
||||||
|
|
||||||
|
map_type m_map;
|
||||||
|
uint32_t m_num_insert;
|
||||||
|
uint32_t m_num_duplicates;
|
||||||
|
uint32_t m_max_key;
|
||||||
|
|
||||||
|
TestFind(map_type map, uint32_t num_inserts, uint32_t num_duplicates)
|
||||||
|
: m_map(map)
|
||||||
|
, m_num_insert(num_inserts)
|
||||||
|
, m_num_duplicates(num_duplicates)
|
||||||
|
, m_max_key( ((num_inserts + num_duplicates) - 1)/num_duplicates )
|
||||||
|
{}
|
||||||
|
|
||||||
|
void testit(value_type &errors)
|
||||||
|
{
|
||||||
|
execution_space::execution_space::fence();
|
||||||
|
Kokkos::parallel_reduce(m_map.capacity(), *this, errors);
|
||||||
|
execution_space::execution_space::fence();
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static void init( value_type & dst)
|
||||||
|
{
|
||||||
|
dst = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static void join( volatile value_type & dst, const volatile value_type & src)
|
||||||
|
{ dst += src; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(typename execution_space::size_type i, value_type & errors) const
|
||||||
|
{
|
||||||
|
const bool expect_to_find_i = (i < m_max_key);
|
||||||
|
|
||||||
|
const bool exists = m_map.exists(i);
|
||||||
|
|
||||||
|
if (expect_to_find_i && !exists) ++errors;
|
||||||
|
if (!expect_to_find_i && exists) ++errors;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Device>
|
||||||
|
void test_insert( uint32_t num_nodes , uint32_t num_inserts , uint32_t num_duplicates , bool near )
|
||||||
|
{
|
||||||
|
typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type;
|
||||||
|
typedef Kokkos::UnorderedMap<const uint32_t,const uint32_t, Device> const_map_type;
|
||||||
|
|
||||||
|
const uint32_t expected_inserts = (num_inserts + num_duplicates -1u) / num_duplicates;
|
||||||
|
|
||||||
|
map_type map;
|
||||||
|
map.rehash(num_nodes,false);
|
||||||
|
|
||||||
|
if (near) {
|
||||||
|
Impl::TestInsert<map_type,true> test_insert(map, num_inserts, num_duplicates);
|
||||||
|
test_insert.testit();
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
Impl::TestInsert<map_type,false> test_insert(map, num_inserts, num_duplicates);
|
||||||
|
test_insert.testit();
|
||||||
|
}
|
||||||
|
|
||||||
|
const bool print_list = false;
|
||||||
|
if (print_list) {
|
||||||
|
Kokkos::Impl::UnorderedMapPrint<map_type> f(map);
|
||||||
|
f.apply();
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint32_t map_size = map.size();
|
||||||
|
|
||||||
|
ASSERT_FALSE( map.failed_insert());
|
||||||
|
{
|
||||||
|
EXPECT_EQ(expected_inserts, map_size);
|
||||||
|
|
||||||
|
{
|
||||||
|
uint32_t find_errors = 0;
|
||||||
|
Impl::TestFind<const_map_type> test_find(map, num_inserts, num_duplicates);
|
||||||
|
test_find.testit(find_errors);
|
||||||
|
EXPECT_EQ( 0u, find_errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
map.begin_erase();
|
||||||
|
Impl::TestErase<map_type,false> test_erase(map, num_inserts, num_duplicates);
|
||||||
|
test_erase.testit();
|
||||||
|
map.end_erase();
|
||||||
|
EXPECT_EQ(0u, map.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Device>
|
||||||
|
void test_failed_insert( uint32_t num_nodes)
|
||||||
|
{
|
||||||
|
typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type;
|
||||||
|
|
||||||
|
map_type map(num_nodes);
|
||||||
|
Impl::TestInsert<map_type> test_insert(map, 2u*num_nodes, 1u);
|
||||||
|
test_insert.testit(false /*don't rehash on fail*/);
|
||||||
|
Device::execution_space::fence();
|
||||||
|
|
||||||
|
EXPECT_TRUE( map.failed_insert() );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Device>
|
||||||
|
void test_deep_copy( uint32_t num_nodes )
|
||||||
|
{
|
||||||
|
typedef Kokkos::UnorderedMap<uint32_t,uint32_t, Device> map_type;
|
||||||
|
typedef Kokkos::UnorderedMap<const uint32_t, const uint32_t, Device> const_map_type;
|
||||||
|
|
||||||
|
typedef typename map_type::HostMirror host_map_type ;
|
||||||
|
// typedef Kokkos::UnorderedMap<uint32_t, uint32_t, typename Device::host_mirror_execution_space > host_map_type;
|
||||||
|
|
||||||
|
map_type map;
|
||||||
|
map.rehash(num_nodes,false);
|
||||||
|
|
||||||
|
{
|
||||||
|
Impl::TestInsert<map_type> test_insert(map, num_nodes, 1);
|
||||||
|
test_insert.testit();
|
||||||
|
ASSERT_EQ( map.size(), num_nodes);
|
||||||
|
ASSERT_FALSE( map.failed_insert() );
|
||||||
|
{
|
||||||
|
uint32_t find_errors = 0;
|
||||||
|
Impl::TestFind<map_type> test_find(map, num_nodes, 1);
|
||||||
|
test_find.testit(find_errors);
|
||||||
|
EXPECT_EQ( find_errors, 0u);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
host_map_type hmap;
|
||||||
|
Kokkos::deep_copy(hmap, map);
|
||||||
|
|
||||||
|
ASSERT_EQ( map.size(), hmap.size());
|
||||||
|
ASSERT_EQ( map.capacity(), hmap.capacity());
|
||||||
|
{
|
||||||
|
uint32_t find_errors = 0;
|
||||||
|
Impl::TestFind<host_map_type> test_find(hmap, num_nodes, 1);
|
||||||
|
test_find.testit(find_errors);
|
||||||
|
EXPECT_EQ( find_errors, 0u);
|
||||||
|
}
|
||||||
|
|
||||||
|
map_type mmap;
|
||||||
|
Kokkos::deep_copy(mmap, hmap);
|
||||||
|
|
||||||
|
const_map_type cmap = mmap;
|
||||||
|
|
||||||
|
EXPECT_EQ( cmap.size(), num_nodes);
|
||||||
|
|
||||||
|
{
|
||||||
|
uint32_t find_errors = 0;
|
||||||
|
Impl::TestFind<const_map_type> test_find(cmap, num_nodes, 1);
|
||||||
|
test_find.testit(find_errors);
|
||||||
|
EXPECT_EQ( find_errors, 0u);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Test
|
||||||
|
|
||||||
|
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP
|
||||||
131
lib/kokkos/containers/unit_tests/TestVector.hpp
Executable file
131
lib/kokkos/containers/unit_tests/TestVector.hpp
Executable file
@ -0,0 +1,131 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
|
||||||
|
#ifndef KOKKOS_TEST_VECTOR_HPP
|
||||||
|
#define KOKKOS_TEST_VECTOR_HPP
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <impl/Kokkos_Timer.hpp>
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template <typename Scalar, class Device>
|
||||||
|
struct test_vector_combinations
|
||||||
|
{
|
||||||
|
typedef test_vector_combinations<Scalar,Device> self_type;
|
||||||
|
|
||||||
|
typedef Scalar scalar_type;
|
||||||
|
typedef Device execution_space;
|
||||||
|
|
||||||
|
Scalar reference;
|
||||||
|
Scalar result;
|
||||||
|
|
||||||
|
template <typename Vector>
|
||||||
|
Scalar run_me(unsigned int n){
|
||||||
|
Vector a(n,1);
|
||||||
|
|
||||||
|
|
||||||
|
a.push_back(2);
|
||||||
|
a.resize(n+4);
|
||||||
|
a[n+1] = 3;
|
||||||
|
a[n+2] = 4;
|
||||||
|
a[n+3] = 5;
|
||||||
|
|
||||||
|
|
||||||
|
Scalar temp1 = a[2];
|
||||||
|
Scalar temp2 = a[n];
|
||||||
|
Scalar temp3 = a[n+1];
|
||||||
|
|
||||||
|
a.assign(n+2,-1);
|
||||||
|
|
||||||
|
a[2] = temp1;
|
||||||
|
a[n] = temp2;
|
||||||
|
a[n+1] = temp3;
|
||||||
|
|
||||||
|
Scalar test1 = 0;
|
||||||
|
for(unsigned int i=0; i<a.size(); i++)
|
||||||
|
test1+=a[i];
|
||||||
|
|
||||||
|
a.assign(n+1,-2);
|
||||||
|
Scalar test2 = 0;
|
||||||
|
for(unsigned int i=0; i<a.size(); i++)
|
||||||
|
test2+=a[i];
|
||||||
|
|
||||||
|
a.reserve(n+10);
|
||||||
|
|
||||||
|
Scalar test3 = 0;
|
||||||
|
for(unsigned int i=0; i<a.size(); i++)
|
||||||
|
test3+=a[i];
|
||||||
|
|
||||||
|
|
||||||
|
return (test1*test2+test3)*test2+test1*test3;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
test_vector_combinations(unsigned int size)
|
||||||
|
{
|
||||||
|
reference = run_me<std::vector<Scalar> >(size);
|
||||||
|
result = run_me<Kokkos::vector<Scalar,Device> >(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Scalar, typename Device>
|
||||||
|
void test_vector_combinations(unsigned int size)
|
||||||
|
{
|
||||||
|
Impl::test_vector_combinations<Scalar,Device> test(size);
|
||||||
|
ASSERT_EQ( test.reference, test.result);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace Test
|
||||||
|
|
||||||
|
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP
|
||||||
50
lib/kokkos/containers/unit_tests/UnitTestMain.cpp
Executable file
50
lib/kokkos/containers/unit_tests/UnitTestMain.cpp
Executable file
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
::testing::InitGoogleTest(&argc,argv);
|
||||||
|
return RUN_ALL_TESTS();
|
||||||
|
}
|
||||||
|
|
||||||
66
lib/kokkos/core/perf_test/Makefile
Executable file
66
lib/kokkos/core/perf_test/Makefile
Executable file
@ -0,0 +1,66 @@
|
|||||||
|
KOKKOS_PATH = ../..
|
||||||
|
|
||||||
|
GTEST_PATH = ../../TPL/gtest
|
||||||
|
|
||||||
|
vpath %.cpp ${KOKKOS_PATH}/core/perf_test
|
||||||
|
|
||||||
|
default: build_all
|
||||||
|
echo "End Build"
|
||||||
|
|
||||||
|
|
||||||
|
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
CXX = nvcc_wrapper
|
||||||
|
CXXFLAGS ?= -O3
|
||||||
|
LINK = $(CXX)
|
||||||
|
LDFLAGS ?= -lpthread
|
||||||
|
else
|
||||||
|
CXX ?= g++
|
||||||
|
CXXFLAGS ?= -O3
|
||||||
|
LINK ?= $(CXX)
|
||||||
|
LDFLAGS ?= -lpthread
|
||||||
|
endif
|
||||||
|
|
||||||
|
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/perf_test
|
||||||
|
|
||||||
|
TEST_TARGETS =
|
||||||
|
TARGETS =
|
||||||
|
|
||||||
|
OBJ_PERF = PerfTestHost.o PerfTestCuda.o PerfTestMain.o gtest-all.o
|
||||||
|
TARGETS += KokkosCore_PerformanceTest
|
||||||
|
TEST_TARGETS += test-performance
|
||||||
|
|
||||||
|
OBJ_ATOMICS = test_atomic.o
|
||||||
|
TARGETS += KokkosCore_PerformanceTest_Atomics
|
||||||
|
TEST_TARGETS += test-atomic
|
||||||
|
|
||||||
|
|
||||||
|
KokkosCore_PerformanceTest: $(OBJ_PERF) $(KOKKOS_LINK_DEPENDS)
|
||||||
|
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_PERF) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest
|
||||||
|
|
||||||
|
KokkosCore_PerformanceTest_Atomics: $(OBJ_ATOMICS) $(KOKKOS_LINK_DEPENDS)
|
||||||
|
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_ATOMICS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest_Atomics
|
||||||
|
|
||||||
|
test-performance: KokkosCore_PerformanceTest
|
||||||
|
./KokkosCore_PerformanceTest
|
||||||
|
|
||||||
|
test-atomic: KokkosCore_PerformanceTest_Atomics
|
||||||
|
./KokkosCore_PerformanceTest_Atomics
|
||||||
|
|
||||||
|
|
||||||
|
build_all: $(TARGETS)
|
||||||
|
|
||||||
|
test: $(TEST_TARGETS)
|
||||||
|
|
||||||
|
clean: kokkos-clean
|
||||||
|
rm -f *.o $(TARGETS)
|
||||||
|
|
||||||
|
# Compilation rules
|
||||||
|
|
||||||
|
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||||
|
|
||||||
|
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
|
||||||
|
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
|
||||||
|
|
||||||
309
lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp
Executable file
309
lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp
Executable file
@ -0,0 +1,309 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_BLAS_KERNELS_HPP
|
||||||
|
#define KOKKOS_BLAS_KERNELS_HPP
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
template< class ConstVectorType ,
|
||||||
|
class Device = typename ConstVectorType::execution_space >
|
||||||
|
struct Dot ;
|
||||||
|
|
||||||
|
template< class ConstVectorType ,
|
||||||
|
class Device = typename ConstVectorType::execution_space >
|
||||||
|
struct DotSingle ;
|
||||||
|
|
||||||
|
template< class ConstScalarType ,
|
||||||
|
class VectorType ,
|
||||||
|
class Device = typename VectorType::execution_space >
|
||||||
|
struct Scale ;
|
||||||
|
|
||||||
|
template< class ConstScalarType ,
|
||||||
|
class ConstVectorType ,
|
||||||
|
class VectorType ,
|
||||||
|
class Device = typename VectorType::execution_space >
|
||||||
|
struct AXPBY ;
|
||||||
|
|
||||||
|
/** \brief Y = alpha * X + beta * Y */
|
||||||
|
template< class ConstScalarType ,
|
||||||
|
class ConstVectorType ,
|
||||||
|
class VectorType >
|
||||||
|
void axpby( const ConstScalarType & alpha ,
|
||||||
|
const ConstVectorType & X ,
|
||||||
|
const ConstScalarType & beta ,
|
||||||
|
const VectorType & Y )
|
||||||
|
{
|
||||||
|
typedef AXPBY< ConstScalarType , ConstVectorType , VectorType > functor ;
|
||||||
|
|
||||||
|
parallel_for( Y.dimension_0() , functor( alpha , X , beta , Y ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Y *= alpha */
|
||||||
|
template< class ConstScalarType ,
|
||||||
|
class VectorType >
|
||||||
|
void scale( const ConstScalarType & alpha , const VectorType & Y )
|
||||||
|
{
|
||||||
|
typedef Scale< ConstScalarType , VectorType > functor ;
|
||||||
|
|
||||||
|
parallel_for( Y.dimension_0() , functor( alpha , Y ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class ConstVectorType ,
|
||||||
|
class Finalize >
|
||||||
|
void dot( const ConstVectorType & X ,
|
||||||
|
const ConstVectorType & Y ,
|
||||||
|
const Finalize & finalize )
|
||||||
|
{
|
||||||
|
typedef Dot< ConstVectorType > functor ;
|
||||||
|
|
||||||
|
parallel_reduce( X.dimension_0() , functor( X , Y ) , finalize );
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class ConstVectorType ,
|
||||||
|
class Finalize >
|
||||||
|
void dot( const ConstVectorType & X ,
|
||||||
|
const Finalize & finalize )
|
||||||
|
{
|
||||||
|
typedef DotSingle< ConstVectorType > functor ;
|
||||||
|
|
||||||
|
parallel_reduce( X.dimension_0() , functor( X ) , finalize );
|
||||||
|
}
|
||||||
|
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
template< class Type , class Device >
|
||||||
|
struct Dot
|
||||||
|
{
|
||||||
|
typedef typename Device::execution_space execution_space ;
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
||||||
|
Impl::unsigned_< Type::Rank > >::type ok_rank ;
|
||||||
|
|
||||||
|
|
||||||
|
/* typedef typename
|
||||||
|
Impl::StaticAssertSame< execution_space ,
|
||||||
|
typename Type::execution_space >::type ok_device ;*/
|
||||||
|
|
||||||
|
typedef double value_type ;
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
typename Type::const_type X ;
|
||||||
|
typename Type::const_type Y ;
|
||||||
|
#else
|
||||||
|
Type X ;
|
||||||
|
Type Y ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Dot( const Type & arg_x , const Type & arg_y )
|
||||||
|
: X(arg_x) , Y(arg_y) { }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()( int i , value_type & update ) const
|
||||||
|
{ update += X[i] * Y[i]; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static void join( volatile value_type & update ,
|
||||||
|
const volatile value_type & source )
|
||||||
|
{ update += source; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static void init( value_type & update )
|
||||||
|
{ update = 0 ; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class Type , class Device >
|
||||||
|
struct DotSingle
|
||||||
|
{
|
||||||
|
typedef typename Device::execution_space execution_space ;
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
||||||
|
Impl::unsigned_< Type::Rank > >::type ok_rank ;
|
||||||
|
|
||||||
|
/* typedef typename
|
||||||
|
Impl::StaticAssertSame< execution_space ,
|
||||||
|
typename Type::execution_space >::type ok_device ;*/
|
||||||
|
|
||||||
|
typedef double value_type ;
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
typename Type::const_type X ;
|
||||||
|
#else
|
||||||
|
Type X ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
DotSingle( const Type & arg_x ) : X(arg_x) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()( int i , value_type & update ) const
|
||||||
|
{
|
||||||
|
const typename Type::value_type & x = X[i]; update += x * x ;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static void join( volatile value_type & update ,
|
||||||
|
const volatile value_type & source )
|
||||||
|
{ update += source; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static void init( value_type & update )
|
||||||
|
{ update = 0 ; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template< class ScalarType , class VectorType , class Device>
|
||||||
|
struct Scale
|
||||||
|
{
|
||||||
|
typedef typename Device::execution_space execution_space ;
|
||||||
|
|
||||||
|
/* typedef typename
|
||||||
|
Impl::StaticAssertSame< execution_space ,
|
||||||
|
typename ScalarType::execution_space >::type
|
||||||
|
ok_scalar_device ;
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Impl::StaticAssertSame< execution_space ,
|
||||||
|
typename VectorType::execution_space >::type
|
||||||
|
ok_vector_device ;*/
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Impl::StaticAssertSame< Impl::unsigned_< 0 > ,
|
||||||
|
Impl::unsigned_< ScalarType::Rank > >::type
|
||||||
|
ok_scalar_rank ;
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
||||||
|
Impl::unsigned_< VectorType::Rank > >::type
|
||||||
|
ok_vector_rank ;
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
typename ScalarType::const_type alpha ;
|
||||||
|
#else
|
||||||
|
ScalarType alpha ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
VectorType Y ;
|
||||||
|
|
||||||
|
Scale( const ScalarType & arg_alpha , const VectorType & arg_Y )
|
||||||
|
: alpha( arg_alpha ), Y( arg_Y ) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()( int i ) const
|
||||||
|
{
|
||||||
|
Y[i] *= alpha() ;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template< class ScalarType ,
|
||||||
|
class ConstVectorType ,
|
||||||
|
class VectorType,
|
||||||
|
class Device>
|
||||||
|
struct AXPBY
|
||||||
|
{
|
||||||
|
typedef typename Device::execution_space execution_space ;
|
||||||
|
|
||||||
|
/* typedef typename
|
||||||
|
Impl::StaticAssertSame< execution_space ,
|
||||||
|
typename ScalarType::execution_space >::type
|
||||||
|
ok_scalar_device ;
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Impl::StaticAssertSame< execution_space ,
|
||||||
|
typename ConstVectorType::execution_space >::type
|
||||||
|
ok_const_vector_device ;
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Impl::StaticAssertSame< execution_space ,
|
||||||
|
typename VectorType::execution_space >::type
|
||||||
|
ok_vector_device ;*/
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Impl::StaticAssertSame< Impl::unsigned_< 0 > ,
|
||||||
|
Impl::unsigned_< ScalarType::Rank > >::type
|
||||||
|
ok_scalar_rank ;
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
||||||
|
Impl::unsigned_< ConstVectorType::Rank > >::type
|
||||||
|
ok_const_vector_rank ;
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Impl::StaticAssertSame< Impl::unsigned_< 1 > ,
|
||||||
|
Impl::unsigned_< VectorType::Rank > >::type
|
||||||
|
ok_vector_rank ;
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
typename ScalarType::const_type alpha , beta ;
|
||||||
|
typename ConstVectorType::const_type X ;
|
||||||
|
#else
|
||||||
|
ScalarType alpha , beta ;
|
||||||
|
ConstVectorType X ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
VectorType Y ;
|
||||||
|
|
||||||
|
AXPBY( const ScalarType & arg_alpha ,
|
||||||
|
const ConstVectorType & arg_X ,
|
||||||
|
const ScalarType & arg_beta ,
|
||||||
|
const VectorType & arg_Y )
|
||||||
|
: alpha( arg_alpha ), beta( arg_beta ), X( arg_X ), Y( arg_Y ) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()( int i ) const
|
||||||
|
{
|
||||||
|
Y[i] = alpha() * X[i] + beta() * Y[i] ;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
#endif /* #ifndef KOKKOS_BLAS_KERNELS_HPP */
|
||||||
189
lib/kokkos/core/perf_test/PerfTestCuda.cpp
Executable file
189
lib/kokkos/core/perf_test/PerfTestCuda.cpp
Executable file
@ -0,0 +1,189 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_CUDA )
|
||||||
|
|
||||||
|
#include <impl/Kokkos_Timer.hpp>
|
||||||
|
|
||||||
|
#include <PerfTestHexGrad.hpp>
|
||||||
|
#include <PerfTestBlasKernels.hpp>
|
||||||
|
#include <PerfTestGramSchmidt.hpp>
|
||||||
|
#include <PerfTestDriver.hpp>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
class cuda : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase() {
|
||||||
|
Kokkos::HostSpace::execution_space::initialize();
|
||||||
|
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
|
||||||
|
}
|
||||||
|
static void TearDownTestCase() {
|
||||||
|
Kokkos::Cuda::finalize();
|
||||||
|
Kokkos::HostSpace::execution_space::finalize();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F( cuda, hexgrad )
|
||||||
|
{
|
||||||
|
EXPECT_NO_THROW( run_test_hexgrad< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F( cuda, gramschmidt )
|
||||||
|
{
|
||||||
|
EXPECT_NO_THROW( run_test_gramschmidt< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct TextureFetch
|
||||||
|
{
|
||||||
|
typedef Kokkos::View< T *, Kokkos::CudaSpace> array_type;
|
||||||
|
typedef Kokkos::View< const T *, Kokkos::CudaSpace, Kokkos::MemoryRandomAccess> const_array_type;
|
||||||
|
typedef Kokkos::View< int *, Kokkos::CudaSpace> index_array_type;
|
||||||
|
typedef Kokkos::View< const int *, Kokkos::CudaSpace> const_index_array_type;
|
||||||
|
|
||||||
|
struct FillArray
|
||||||
|
{
|
||||||
|
array_type m_array;
|
||||||
|
FillArray( const array_type & array )
|
||||||
|
: m_array(array)
|
||||||
|
{}
|
||||||
|
|
||||||
|
void apply() const
|
||||||
|
{
|
||||||
|
Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.size()), *this);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(int i) const { m_array(i) = i; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct RandomIndexes
|
||||||
|
{
|
||||||
|
index_array_type m_indexes;
|
||||||
|
typename index_array_type::HostMirror m_host_indexes;
|
||||||
|
RandomIndexes( const index_array_type & indexes)
|
||||||
|
: m_indexes(indexes)
|
||||||
|
, m_host_indexes(Kokkos::create_mirror(m_indexes))
|
||||||
|
{}
|
||||||
|
|
||||||
|
void apply() const
|
||||||
|
{
|
||||||
|
Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::HostSpace::execution_space,int>(0,m_host_indexes.size()), *this);
|
||||||
|
//random shuffle
|
||||||
|
Kokkos::HostSpace::execution_space::fence();
|
||||||
|
std::random_shuffle(m_host_indexes.ptr_on_device(), m_host_indexes.ptr_on_device() + m_host_indexes.size());
|
||||||
|
Kokkos::deep_copy(m_indexes,m_host_indexes);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(int i) const { m_host_indexes(i) = i; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct RandomReduce
|
||||||
|
{
|
||||||
|
const_array_type m_array;
|
||||||
|
const_index_array_type m_indexes;
|
||||||
|
RandomReduce( const const_array_type & array, const const_index_array_type & indexes)
|
||||||
|
: m_array(array)
|
||||||
|
, m_indexes(indexes)
|
||||||
|
{}
|
||||||
|
|
||||||
|
void apply(T & reduce) const
|
||||||
|
{
|
||||||
|
Kokkos::parallel_reduce( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.size()), *this, reduce);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(int i, T & reduce) const
|
||||||
|
{ reduce += m_array(m_indexes(i)); }
|
||||||
|
};
|
||||||
|
|
||||||
|
static void run(int size, double & reduce_time, T &reduce)
|
||||||
|
{
|
||||||
|
array_type array("array",size);
|
||||||
|
index_array_type indexes("indexes",size);
|
||||||
|
|
||||||
|
{ FillArray f(array); f.apply(); }
|
||||||
|
{ RandomIndexes f(indexes); f.apply(); }
|
||||||
|
|
||||||
|
Kokkos::Cuda::fence();
|
||||||
|
|
||||||
|
Kokkos::Impl::Timer timer;
|
||||||
|
for (int j=0; j<10; ++j) {
|
||||||
|
RandomReduce f(array,indexes);
|
||||||
|
f.apply(reduce);
|
||||||
|
}
|
||||||
|
Kokkos::Cuda::fence();
|
||||||
|
reduce_time = timer.seconds();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // unnamed namespace
|
||||||
|
|
||||||
|
TEST_F( cuda, texture_double )
|
||||||
|
{
|
||||||
|
printf("Random reduce of double through texture fetch\n");
|
||||||
|
for (int i=1; i<=27; ++i) {
|
||||||
|
int size = 1<<i;
|
||||||
|
double time = 0;
|
||||||
|
double reduce = 0;
|
||||||
|
TextureFetch<double>::run(size,time,reduce);
|
||||||
|
printf(" time = %1.3e size = 2^%d\n", time, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Test
|
||||||
|
|
||||||
|
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||||
|
|
||||||
152
lib/kokkos/core/perf_test/PerfTestDriver.hpp
Executable file
152
lib/kokkos/core/perf_test/PerfTestDriver.hpp
Executable file
@ -0,0 +1,152 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
// mfh 06 Jun 2013: This macro doesn't work like one might thing it
|
||||||
|
// should. It doesn't take the template parameter DeviceType and
|
||||||
|
// print its actual type name; it just literally prints out
|
||||||
|
// "DeviceType". I've worked around this below without using the
|
||||||
|
// macro, so I'm commenting out the macro to avoid compiler complaints
|
||||||
|
// about an unused macro.
|
||||||
|
|
||||||
|
// #define KOKKOS_MACRO_IMPL_TO_STRING( X ) #X
|
||||||
|
// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_MACRO_IMPL_TO_STRING( X )
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
enum { NUMBER_OF_TRIALS = 5 };
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template< class DeviceType >
|
||||||
|
void run_test_hexgrad( int exp_beg , int exp_end, const char deviceTypeName[] )
|
||||||
|
{
|
||||||
|
std::string label_hexgrad ;
|
||||||
|
label_hexgrad.append( "\"HexGrad< double , " );
|
||||||
|
// mfh 06 Jun 2013: This only appends "DeviceType" (literally) to
|
||||||
|
// the string, not the actual name of the device type. Thus, I've
|
||||||
|
// modified the function to take the name of the device type.
|
||||||
|
//
|
||||||
|
//label_hexgrad.append( KOKKOS_MACRO_TO_STRING( DeviceType ) );
|
||||||
|
label_hexgrad.append( deviceTypeName );
|
||||||
|
label_hexgrad.append( " >\"" );
|
||||||
|
|
||||||
|
for (int i = exp_beg ; i < exp_end ; ++i) {
|
||||||
|
double min_seconds = 0.0 ;
|
||||||
|
double max_seconds = 0.0 ;
|
||||||
|
double avg_seconds = 0.0 ;
|
||||||
|
|
||||||
|
const int parallel_work_length = 1<<i;
|
||||||
|
|
||||||
|
for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) {
|
||||||
|
const double seconds = HexGrad< DeviceType >::test(parallel_work_length) ;
|
||||||
|
|
||||||
|
if ( 0 == j ) {
|
||||||
|
min_seconds = seconds ;
|
||||||
|
max_seconds = seconds ;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if ( seconds < min_seconds ) min_seconds = seconds ;
|
||||||
|
if ( seconds > max_seconds ) max_seconds = seconds ;
|
||||||
|
}
|
||||||
|
avg_seconds += seconds ;
|
||||||
|
}
|
||||||
|
avg_seconds /= NUMBER_OF_TRIALS ;
|
||||||
|
|
||||||
|
std::cout << label_hexgrad
|
||||||
|
<< " , " << parallel_work_length
|
||||||
|
<< " , " << min_seconds
|
||||||
|
<< " , " << ( min_seconds / parallel_work_length )
|
||||||
|
<< std::endl ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class DeviceType >
|
||||||
|
void run_test_gramschmidt( int exp_beg , int exp_end, const char deviceTypeName[] )
|
||||||
|
{
|
||||||
|
std::string label_gramschmidt ;
|
||||||
|
label_gramschmidt.append( "\"GramSchmidt< double , " );
|
||||||
|
// mfh 06 Jun 2013: This only appends "DeviceType" (literally) to
|
||||||
|
// the string, not the actual name of the device type. Thus, I've
|
||||||
|
// modified the function to take the name of the device type.
|
||||||
|
//
|
||||||
|
//label_gramschmidt.append( KOKKOS_MACRO_TO_STRING( DeviceType ) );
|
||||||
|
label_gramschmidt.append( deviceTypeName );
|
||||||
|
label_gramschmidt.append( " >\"" );
|
||||||
|
|
||||||
|
for (int i = exp_beg ; i < exp_end ; ++i) {
|
||||||
|
double min_seconds = 0.0 ;
|
||||||
|
double max_seconds = 0.0 ;
|
||||||
|
double avg_seconds = 0.0 ;
|
||||||
|
|
||||||
|
const int parallel_work_length = 1<<i;
|
||||||
|
|
||||||
|
for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) {
|
||||||
|
const double seconds = ModifiedGramSchmidt< double , DeviceType >::test(parallel_work_length, 32 ) ;
|
||||||
|
|
||||||
|
if ( 0 == j ) {
|
||||||
|
min_seconds = seconds ;
|
||||||
|
max_seconds = seconds ;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if ( seconds < min_seconds ) min_seconds = seconds ;
|
||||||
|
if ( seconds > max_seconds ) max_seconds = seconds ;
|
||||||
|
}
|
||||||
|
avg_seconds += seconds ;
|
||||||
|
}
|
||||||
|
avg_seconds /= NUMBER_OF_TRIALS ;
|
||||||
|
|
||||||
|
std::cout << label_gramschmidt
|
||||||
|
<< " , " << parallel_work_length
|
||||||
|
<< " , " << min_seconds
|
||||||
|
<< " , " << ( min_seconds / parallel_work_length )
|
||||||
|
<< std::endl ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
231
lib/kokkos/core/perf_test/PerfTestGramSchmidt.hpp
Executable file
231
lib/kokkos/core/perf_test/PerfTestGramSchmidt.hpp
Executable file
@ -0,0 +1,231 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <PerfTestBlasKernels.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
// Reduction : result = dot( Q(:,j) , Q(:,j) );
|
||||||
|
// PostProcess : R(j,j) = result ; inv = 1 / result ;
|
||||||
|
template< class VectorView , class ValueView >
|
||||||
|
struct InvNorm2 : public Kokkos::DotSingle< VectorView > {
|
||||||
|
|
||||||
|
typedef typename Kokkos::DotSingle< VectorView >::value_type value_type ;
|
||||||
|
|
||||||
|
ValueView Rjj ;
|
||||||
|
ValueView inv ;
|
||||||
|
|
||||||
|
InvNorm2( const VectorView & argX ,
|
||||||
|
const ValueView & argR ,
|
||||||
|
const ValueView & argInv )
|
||||||
|
: Kokkos::DotSingle< VectorView >( argX )
|
||||||
|
, Rjj( argR )
|
||||||
|
, inv( argInv )
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void final( value_type & result ) const
|
||||||
|
{
|
||||||
|
result = sqrt( result );
|
||||||
|
Rjj() = result ;
|
||||||
|
inv() = ( 0 < result ) ? 1.0 / result : 0 ;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class VectorView , class ValueView >
|
||||||
|
inline
|
||||||
|
void invnorm2( const VectorView & x ,
|
||||||
|
const ValueView & r ,
|
||||||
|
const ValueView & r_inv )
|
||||||
|
{
|
||||||
|
Kokkos::parallel_reduce( x.dimension_0() , InvNorm2< VectorView , ValueView >( x , r , r_inv ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
// PostProcess : tmp = - ( R(j,k) = result );
|
||||||
|
template< class VectorView , class ValueView >
|
||||||
|
struct DotM : public Kokkos::Dot< VectorView > {
|
||||||
|
|
||||||
|
typedef typename Kokkos::Dot< VectorView >::value_type value_type ;
|
||||||
|
|
||||||
|
ValueView Rjk ;
|
||||||
|
ValueView tmp ;
|
||||||
|
|
||||||
|
DotM( const VectorView & argX ,
|
||||||
|
const VectorView & argY ,
|
||||||
|
const ValueView & argR ,
|
||||||
|
const ValueView & argTmp )
|
||||||
|
: Kokkos::Dot< VectorView >( argX , argY )
|
||||||
|
, Rjk( argR )
|
||||||
|
, tmp( argTmp )
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void final( value_type & result ) const
|
||||||
|
{
|
||||||
|
Rjk() = result ;
|
||||||
|
tmp() = - result ;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class VectorView , class ValueView >
|
||||||
|
inline
|
||||||
|
void dot_neg( const VectorView & x ,
|
||||||
|
const VectorView & y ,
|
||||||
|
const ValueView & r ,
|
||||||
|
const ValueView & r_neg )
|
||||||
|
{
|
||||||
|
Kokkos::parallel_reduce( x.dimension_0() , DotM< VectorView , ValueView >( x , y , r , r_neg ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template< typename Scalar , class DeviceType >
|
||||||
|
struct ModifiedGramSchmidt
|
||||||
|
{
|
||||||
|
typedef DeviceType execution_space ;
|
||||||
|
typedef typename execution_space::size_type size_type ;
|
||||||
|
|
||||||
|
typedef Kokkos::View< Scalar** ,
|
||||||
|
Kokkos::LayoutLeft ,
|
||||||
|
execution_space > multivector_type ;
|
||||||
|
|
||||||
|
typedef Kokkos::View< Scalar* ,
|
||||||
|
Kokkos::LayoutLeft ,
|
||||||
|
execution_space > vector_type ;
|
||||||
|
|
||||||
|
typedef Kokkos::View< Scalar ,
|
||||||
|
Kokkos::LayoutLeft ,
|
||||||
|
execution_space > value_view ;
|
||||||
|
|
||||||
|
|
||||||
|
multivector_type Q ;
|
||||||
|
multivector_type R ;
|
||||||
|
|
||||||
|
static double factorization( const multivector_type Q_ ,
|
||||||
|
const multivector_type R_ )
|
||||||
|
{
|
||||||
|
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
||||||
|
using Kokkos::Experimental::ALL ;
|
||||||
|
#else
|
||||||
|
const Kokkos::ALL ALL ;
|
||||||
|
#endif
|
||||||
|
const size_type count = Q_.dimension_1();
|
||||||
|
value_view tmp("tmp");
|
||||||
|
value_view one("one");
|
||||||
|
|
||||||
|
Kokkos::deep_copy( one , (Scalar) 1 );
|
||||||
|
|
||||||
|
Kokkos::Impl::Timer timer ;
|
||||||
|
|
||||||
|
for ( size_type j = 0 ; j < count ; ++j ) {
|
||||||
|
// Reduction : tmp = dot( Q(:,j) , Q(:,j) );
|
||||||
|
// PostProcess : tmp = sqrt( tmp ); R(j,j) = tmp ; tmp = 1 / tmp ;
|
||||||
|
const vector_type Qj = Kokkos::subview( Q_ , ALL , j );
|
||||||
|
const value_view Rjj = Kokkos::subview( R_ , j , j );
|
||||||
|
|
||||||
|
invnorm2( Qj , Rjj , tmp );
|
||||||
|
|
||||||
|
// Q(:,j) *= ( 1 / R(j,j) ); => Q(:,j) *= tmp ;
|
||||||
|
Kokkos::scale( tmp , Qj );
|
||||||
|
|
||||||
|
for ( size_t k = j + 1 ; k < count ; ++k ) {
|
||||||
|
const vector_type Qk = Kokkos::subview( Q_ , ALL , k );
|
||||||
|
const value_view Rjk = Kokkos::subview( R_ , j , k );
|
||||||
|
|
||||||
|
// Reduction : R(j,k) = dot( Q(:,j) , Q(:,k) );
|
||||||
|
// PostProcess : tmp = - R(j,k);
|
||||||
|
dot_neg( Qj , Qk , Rjk , tmp );
|
||||||
|
|
||||||
|
// Q(:,k) -= R(j,k) * Q(:,j); => Q(:,k) += tmp * Q(:,j)
|
||||||
|
Kokkos::axpby( tmp , Qj , one , Qk );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
execution_space::fence();
|
||||||
|
|
||||||
|
return timer.seconds();
|
||||||
|
}
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------
|
||||||
|
|
||||||
|
static double test( const size_t length ,
|
||||||
|
const size_t count ,
|
||||||
|
const size_t iter = 1 )
|
||||||
|
{
|
||||||
|
multivector_type Q_( "Q" , length , count );
|
||||||
|
multivector_type R_( "R" , count , count );
|
||||||
|
|
||||||
|
typename multivector_type::HostMirror A =
|
||||||
|
Kokkos::create_mirror( Q_ );
|
||||||
|
|
||||||
|
// Create and fill A on the host
|
||||||
|
|
||||||
|
for ( size_type j = 0 ; j < count ; ++j ) {
|
||||||
|
for ( size_type i = 0 ; i < length ; ++i ) {
|
||||||
|
A(i,j) = ( i + 1 ) * ( j + 1 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double dt_min = 0 ;
|
||||||
|
|
||||||
|
for ( size_t i = 0 ; i < iter ; ++i ) {
|
||||||
|
|
||||||
|
Kokkos::deep_copy( Q_ , A );
|
||||||
|
|
||||||
|
// A = Q * R
|
||||||
|
|
||||||
|
const double dt = factorization( Q_ , R_ );
|
||||||
|
|
||||||
|
if ( 0 == i ) dt_min = dt ;
|
||||||
|
else dt_min = dt < dt_min ? dt : dt_min ;
|
||||||
|
}
|
||||||
|
|
||||||
|
return dt_min ;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
268
lib/kokkos/core/perf_test/PerfTestHexGrad.hpp
Executable file
268
lib/kokkos/core/perf_test/PerfTestHexGrad.hpp
Executable file
@ -0,0 +1,268 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
template< class DeviceType ,
|
||||||
|
typename CoordScalarType = double ,
|
||||||
|
typename GradScalarType = float >
|
||||||
|
struct HexGrad
|
||||||
|
{
|
||||||
|
typedef DeviceType execution_space ;
|
||||||
|
typedef typename execution_space::size_type size_type ;
|
||||||
|
|
||||||
|
typedef HexGrad<DeviceType,CoordScalarType,GradScalarType> self_type;
|
||||||
|
|
||||||
|
// 3D array : ( ParallelWork , Space , Node )
|
||||||
|
|
||||||
|
enum { NSpace = 3 , NNode = 8 };
|
||||||
|
|
||||||
|
typedef Kokkos::View< CoordScalarType*[NSpace][NNode] , execution_space >
|
||||||
|
elem_coord_type ;
|
||||||
|
|
||||||
|
typedef Kokkos::View< GradScalarType*[NSpace][NNode] , execution_space >
|
||||||
|
elem_grad_type ;
|
||||||
|
|
||||||
|
elem_coord_type coords ;
|
||||||
|
elem_grad_type grad_op ;
|
||||||
|
|
||||||
|
enum { FLOPS = 318 }; // = 3 * ( 18 + 8 * 11 ) };
|
||||||
|
enum { READS = 18 };
|
||||||
|
enum { WRITES = 18 };
|
||||||
|
|
||||||
|
HexGrad( const elem_coord_type & arg_coords ,
|
||||||
|
const elem_grad_type & arg_grad_op )
|
||||||
|
: coords( arg_coords )
|
||||||
|
, grad_op( arg_grad_op )
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION static
|
||||||
|
void grad( const CoordScalarType x[] ,
|
||||||
|
const CoordScalarType z[] ,
|
||||||
|
GradScalarType grad_y[] )
|
||||||
|
{
|
||||||
|
const GradScalarType R42=(x[3] - x[1]);
|
||||||
|
const GradScalarType R52=(x[4] - x[1]);
|
||||||
|
const GradScalarType R54=(x[4] - x[3]);
|
||||||
|
|
||||||
|
const GradScalarType R63=(x[5] - x[2]);
|
||||||
|
const GradScalarType R83=(x[7] - x[2]);
|
||||||
|
const GradScalarType R86=(x[7] - x[5]);
|
||||||
|
|
||||||
|
const GradScalarType R31=(x[2] - x[0]);
|
||||||
|
const GradScalarType R61=(x[5] - x[0]);
|
||||||
|
const GradScalarType R74=(x[6] - x[3]);
|
||||||
|
|
||||||
|
const GradScalarType R72=(x[6] - x[1]);
|
||||||
|
const GradScalarType R75=(x[6] - x[4]);
|
||||||
|
const GradScalarType R81=(x[7] - x[0]);
|
||||||
|
|
||||||
|
const GradScalarType t1=(R63 + R54);
|
||||||
|
const GradScalarType t2=(R61 + R74);
|
||||||
|
const GradScalarType t3=(R72 + R81);
|
||||||
|
|
||||||
|
const GradScalarType t4 =(R86 + R42);
|
||||||
|
const GradScalarType t5 =(R83 + R52);
|
||||||
|
const GradScalarType t6 =(R75 + R31);
|
||||||
|
|
||||||
|
// Calculate Y gradient from X and Z data
|
||||||
|
|
||||||
|
grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54);
|
||||||
|
grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61);
|
||||||
|
grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72);
|
||||||
|
grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83);
|
||||||
|
grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61);
|
||||||
|
grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72);
|
||||||
|
grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83);
|
||||||
|
grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()( size_type ielem ) const
|
||||||
|
{
|
||||||
|
GradScalarType g[NNode] ;
|
||||||
|
|
||||||
|
const CoordScalarType x[NNode] = {
|
||||||
|
coords(ielem,0,0),
|
||||||
|
coords(ielem,0,1),
|
||||||
|
coords(ielem,0,2),
|
||||||
|
coords(ielem,0,3),
|
||||||
|
coords(ielem,0,4),
|
||||||
|
coords(ielem,0,5),
|
||||||
|
coords(ielem,0,6),
|
||||||
|
coords(ielem,0,7)
|
||||||
|
};
|
||||||
|
|
||||||
|
const CoordScalarType y[NNode] = {
|
||||||
|
coords(ielem,1,0),
|
||||||
|
coords(ielem,1,1),
|
||||||
|
coords(ielem,1,2),
|
||||||
|
coords(ielem,1,3),
|
||||||
|
coords(ielem,1,4),
|
||||||
|
coords(ielem,1,5),
|
||||||
|
coords(ielem,1,6),
|
||||||
|
coords(ielem,1,7)
|
||||||
|
};
|
||||||
|
|
||||||
|
const CoordScalarType z[NNode] = {
|
||||||
|
coords(ielem,2,0),
|
||||||
|
coords(ielem,2,1),
|
||||||
|
coords(ielem,2,2),
|
||||||
|
coords(ielem,2,3),
|
||||||
|
coords(ielem,2,4),
|
||||||
|
coords(ielem,2,5),
|
||||||
|
coords(ielem,2,6),
|
||||||
|
coords(ielem,2,7)
|
||||||
|
};
|
||||||
|
|
||||||
|
grad( z , y , g );
|
||||||
|
|
||||||
|
grad_op(ielem,0,0) = g[0];
|
||||||
|
grad_op(ielem,0,1) = g[1];
|
||||||
|
grad_op(ielem,0,2) = g[2];
|
||||||
|
grad_op(ielem,0,3) = g[3];
|
||||||
|
grad_op(ielem,0,4) = g[4];
|
||||||
|
grad_op(ielem,0,5) = g[5];
|
||||||
|
grad_op(ielem,0,6) = g[6];
|
||||||
|
grad_op(ielem,0,7) = g[7];
|
||||||
|
|
||||||
|
grad( x , z , g );
|
||||||
|
|
||||||
|
grad_op(ielem,1,0) = g[0];
|
||||||
|
grad_op(ielem,1,1) = g[1];
|
||||||
|
grad_op(ielem,1,2) = g[2];
|
||||||
|
grad_op(ielem,1,3) = g[3];
|
||||||
|
grad_op(ielem,1,4) = g[4];
|
||||||
|
grad_op(ielem,1,5) = g[5];
|
||||||
|
grad_op(ielem,1,6) = g[6];
|
||||||
|
grad_op(ielem,1,7) = g[7];
|
||||||
|
|
||||||
|
grad( y , x , g );
|
||||||
|
|
||||||
|
grad_op(ielem,2,0) = g[0];
|
||||||
|
grad_op(ielem,2,1) = g[1];
|
||||||
|
grad_op(ielem,2,2) = g[2];
|
||||||
|
grad_op(ielem,2,3) = g[3];
|
||||||
|
grad_op(ielem,2,4) = g[4];
|
||||||
|
grad_op(ielem,2,5) = g[5];
|
||||||
|
grad_op(ielem,2,6) = g[6];
|
||||||
|
grad_op(ielem,2,7) = g[7];
|
||||||
|
}
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------
|
||||||
|
|
||||||
|
struct Init {
|
||||||
|
typedef typename self_type::execution_space execution_space ;
|
||||||
|
|
||||||
|
elem_coord_type coords ;
|
||||||
|
|
||||||
|
Init( const elem_coord_type & arg_coords )
|
||||||
|
: coords( arg_coords ) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()( size_type ielem ) const
|
||||||
|
{
|
||||||
|
coords(ielem,0,0) = 0.;
|
||||||
|
coords(ielem,1,0) = 0.;
|
||||||
|
coords(ielem,2,0) = 0.;
|
||||||
|
|
||||||
|
coords(ielem,0,1) = 1.;
|
||||||
|
coords(ielem,1,1) = 0.;
|
||||||
|
coords(ielem,2,1) = 0.;
|
||||||
|
|
||||||
|
coords(ielem,0,2) = 1.;
|
||||||
|
coords(ielem,1,2) = 1.;
|
||||||
|
coords(ielem,2,2) = 0.;
|
||||||
|
|
||||||
|
coords(ielem,0,3) = 0.;
|
||||||
|
coords(ielem,1,3) = 1.;
|
||||||
|
coords(ielem,2,3) = 0.;
|
||||||
|
|
||||||
|
|
||||||
|
coords(ielem,0,4) = 0.;
|
||||||
|
coords(ielem,1,4) = 0.;
|
||||||
|
coords(ielem,2,4) = 1.;
|
||||||
|
|
||||||
|
coords(ielem,0,5) = 1.;
|
||||||
|
coords(ielem,1,5) = 0.;
|
||||||
|
coords(ielem,2,5) = 1.;
|
||||||
|
|
||||||
|
coords(ielem,0,6) = 1.;
|
||||||
|
coords(ielem,1,6) = 1.;
|
||||||
|
coords(ielem,2,6) = 1.;
|
||||||
|
|
||||||
|
coords(ielem,0,7) = 0.;
|
||||||
|
coords(ielem,1,7) = 1.;
|
||||||
|
coords(ielem,2,7) = 1.;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------
|
||||||
|
|
||||||
|
static double test( const int count , const int iter = 1 )
|
||||||
|
{
|
||||||
|
elem_coord_type coord( "coord" , count );
|
||||||
|
elem_grad_type grad ( "grad" , count );
|
||||||
|
|
||||||
|
// Execute the parallel kernels on the arrays:
|
||||||
|
|
||||||
|
double dt_min = 0 ;
|
||||||
|
|
||||||
|
Kokkos::parallel_for( count , Init( coord ) );
|
||||||
|
execution_space::fence();
|
||||||
|
|
||||||
|
for ( int i = 0 ; i < iter ; ++i ) {
|
||||||
|
Kokkos::Impl::Timer timer ;
|
||||||
|
Kokkos::parallel_for( count , HexGrad<execution_space>( coord , grad ) );
|
||||||
|
execution_space::fence();
|
||||||
|
const double dt = timer.seconds();
|
||||||
|
if ( 0 == i ) dt_min = dt ;
|
||||||
|
else dt_min = dt < dt_min ? dt : dt_min ;
|
||||||
|
}
|
||||||
|
|
||||||
|
return dt_min ;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
104
lib/kokkos/core/perf_test/PerfTestHost.cpp
Executable file
104
lib/kokkos/core/perf_test/PerfTestHost.cpp
Executable file
@ -0,0 +1,104 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_OPENMP )
|
||||||
|
|
||||||
|
typedef Kokkos::OpenMP TestHostDevice ;
|
||||||
|
const char TestHostDeviceName[] = "Kokkos::OpenMP" ;
|
||||||
|
|
||||||
|
#elif defined( KOKKOS_HAVE_PTHREAD )
|
||||||
|
|
||||||
|
typedef Kokkos::Threads TestHostDevice ;
|
||||||
|
const char TestHostDeviceName[] = "Kokkos::Threads" ;
|
||||||
|
|
||||||
|
#elif defined( KOKKOS_HAVE_SERIAL )
|
||||||
|
|
||||||
|
typedef Kokkos::Serial TestHostDevice ;
|
||||||
|
const char TestHostDeviceName[] = "Kokkos::Serial" ;
|
||||||
|
|
||||||
|
#else
|
||||||
|
# error "You must enable at least one of the following execution spaces in order to build this test: Kokkos::Threads, Kokkos::OpenMP, or Kokkos::Serial."
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <impl/Kokkos_Timer.hpp>
|
||||||
|
|
||||||
|
#include <PerfTestHexGrad.hpp>
|
||||||
|
#include <PerfTestBlasKernels.hpp>
|
||||||
|
#include <PerfTestGramSchmidt.hpp>
|
||||||
|
#include <PerfTestDriver.hpp>
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Test {
|
||||||
|
|
||||||
|
class host : public ::testing::Test {
|
||||||
|
protected:
|
||||||
|
static void SetUpTestCase()
|
||||||
|
{
|
||||||
|
const unsigned team_count = Kokkos::hwloc::get_available_numa_count();
|
||||||
|
const unsigned threads_per_team = 4 ;
|
||||||
|
|
||||||
|
TestHostDevice::initialize( team_count * threads_per_team );
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TearDownTestCase()
|
||||||
|
{
|
||||||
|
TestHostDevice::finalize();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F( host, hexgrad ) {
|
||||||
|
EXPECT_NO_THROW(run_test_hexgrad< TestHostDevice>( 10, 20, TestHostDeviceName ));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F( host, gramschmidt ) {
|
||||||
|
EXPECT_NO_THROW(run_test_gramschmidt< TestHostDevice>( 10, 20, TestHostDeviceName ));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Test
|
||||||
|
|
||||||
|
|
||||||
49
lib/kokkos/core/perf_test/PerfTestMain.cpp
Executable file
49
lib/kokkos/core/perf_test/PerfTestMain.cpp
Executable file
@ -0,0 +1,49 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
::testing::InitGoogleTest(&argc,argv);
|
||||||
|
return RUN_ALL_TESTS();
|
||||||
|
}
|
||||||
504
lib/kokkos/core/perf_test/test_atomic.cpp
Executable file
504
lib/kokkos/core/perf_test/test_atomic.cpp
Executable file
@ -0,0 +1,504 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
#include <impl/Kokkos_Timer.hpp>
|
||||||
|
|
||||||
|
typedef Kokkos::DefaultExecutionSpace exec_space;
|
||||||
|
|
||||||
|
#define RESET 0
|
||||||
|
#define BRIGHT 1
|
||||||
|
#define DIM 2
|
||||||
|
#define UNDERLINE 3
|
||||||
|
#define BLINK 4
|
||||||
|
#define REVERSE 7
|
||||||
|
#define HIDDEN 8
|
||||||
|
|
||||||
|
#define BLACK 0
|
||||||
|
#define RED 1
|
||||||
|
#define GREEN 2
|
||||||
|
#define YELLOW 3
|
||||||
|
#define BLUE 4
|
||||||
|
#define MAGENTA 5
|
||||||
|
#define CYAN 6
|
||||||
|
#define GREY 7
|
||||||
|
#define WHITE 8
|
||||||
|
|
||||||
|
void textcolor(int attr, int fg, int bg)
|
||||||
|
{ char command[13];
|
||||||
|
|
||||||
|
/* Command is the control command to the terminal */
|
||||||
|
sprintf(command, "%c[%d;%d;%dm", 0x1B, attr, fg + 30, bg + 40);
|
||||||
|
printf("%s", command);
|
||||||
|
}
|
||||||
|
void textcolor_standard() {textcolor(RESET, BLACK, WHITE);}
|
||||||
|
|
||||||
|
|
||||||
|
template<class T,class DEVICE_TYPE>
|
||||||
|
struct ZeroFunctor{
|
||||||
|
typedef DEVICE_TYPE execution_space;
|
||||||
|
typedef typename Kokkos::View<T,execution_space> type;
|
||||||
|
typedef typename Kokkos::View<T,execution_space>::HostMirror h_type;
|
||||||
|
type data;
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(int i) const {
|
||||||
|
data() = 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//---------------------------------------------------
|
||||||
|
//--------------atomic_fetch_add---------------------
|
||||||
|
//---------------------------------------------------
|
||||||
|
|
||||||
|
template<class T,class DEVICE_TYPE>
|
||||||
|
struct AddFunctor{
|
||||||
|
typedef DEVICE_TYPE execution_space;
|
||||||
|
typedef Kokkos::View<T,execution_space> type;
|
||||||
|
type data;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(int i) const {
|
||||||
|
Kokkos::atomic_fetch_add(&data(),(T)1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T AddLoop(int loop) {
|
||||||
|
struct ZeroFunctor<T,exec_space> f_zero;
|
||||||
|
typename ZeroFunctor<T,exec_space>::type data("Data");
|
||||||
|
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
|
||||||
|
f_zero.data = data;
|
||||||
|
Kokkos::parallel_for(1,f_zero);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
struct AddFunctor<T,exec_space> f_add;
|
||||||
|
f_add.data = data;
|
||||||
|
Kokkos::parallel_for(loop,f_add);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
Kokkos::deep_copy(h_data,data);
|
||||||
|
T val = h_data();
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T,class DEVICE_TYPE>
|
||||||
|
struct AddNonAtomicFunctor{
|
||||||
|
typedef DEVICE_TYPE execution_space;
|
||||||
|
typedef Kokkos::View<T,execution_space> type;
|
||||||
|
type data;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(int i) const {
|
||||||
|
data()+=(T)1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T AddLoopNonAtomic(int loop) {
|
||||||
|
struct ZeroFunctor<T,exec_space> f_zero;
|
||||||
|
typename ZeroFunctor<T,exec_space>::type data("Data");
|
||||||
|
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
|
||||||
|
|
||||||
|
f_zero.data = data;
|
||||||
|
Kokkos::parallel_for(1,f_zero);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
struct AddNonAtomicFunctor<T,exec_space> f_add;
|
||||||
|
f_add.data = data;
|
||||||
|
Kokkos::parallel_for(loop,f_add);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
Kokkos::deep_copy(h_data,data);
|
||||||
|
T val = h_data();
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T AddLoopSerial(int loop) {
|
||||||
|
T* data = new T[1];
|
||||||
|
data[0] = 0;
|
||||||
|
|
||||||
|
for(int i=0;i<loop;i++)
|
||||||
|
*data+=(T)1;
|
||||||
|
|
||||||
|
T val = *data;
|
||||||
|
delete data;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T,class DEVICE_TYPE>
|
||||||
|
struct CASFunctor{
|
||||||
|
typedef DEVICE_TYPE execution_space;
|
||||||
|
typedef Kokkos::View<T,execution_space> type;
|
||||||
|
type data;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(int i) const {
|
||||||
|
T old = data();
|
||||||
|
T newval, assumed;
|
||||||
|
do {
|
||||||
|
assumed = old;
|
||||||
|
newval = assumed + (T)1;
|
||||||
|
old = Kokkos::atomic_compare_exchange(&data(), assumed, newval);
|
||||||
|
}
|
||||||
|
while( old != assumed );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T CASLoop(int loop) {
|
||||||
|
struct ZeroFunctor<T,exec_space> f_zero;
|
||||||
|
typename ZeroFunctor<T,exec_space>::type data("Data");
|
||||||
|
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
|
||||||
|
f_zero.data = data;
|
||||||
|
Kokkos::parallel_for(1,f_zero);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
struct CASFunctor<T,exec_space> f_cas;
|
||||||
|
f_cas.data = data;
|
||||||
|
Kokkos::parallel_for(loop,f_cas);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
Kokkos::deep_copy(h_data,data);
|
||||||
|
T val = h_data();
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T,class DEVICE_TYPE>
|
||||||
|
struct CASNonAtomicFunctor{
|
||||||
|
typedef DEVICE_TYPE execution_space;
|
||||||
|
typedef Kokkos::View<T,execution_space> type;
|
||||||
|
type data;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(int i) const {
|
||||||
|
volatile T assumed;
|
||||||
|
volatile T newval;
|
||||||
|
bool fail=1;
|
||||||
|
do {
|
||||||
|
assumed = data();
|
||||||
|
newval = assumed + (T)1;
|
||||||
|
if(data()==assumed) {
|
||||||
|
data() = newval;
|
||||||
|
fail = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while(fail);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T CASLoopNonAtomic(int loop) {
|
||||||
|
struct ZeroFunctor<T,exec_space> f_zero;
|
||||||
|
typename ZeroFunctor<T,exec_space>::type data("Data");
|
||||||
|
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
|
||||||
|
f_zero.data = data;
|
||||||
|
Kokkos::parallel_for(1,f_zero);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
struct CASNonAtomicFunctor<T,exec_space> f_cas;
|
||||||
|
f_cas.data = data;
|
||||||
|
Kokkos::parallel_for(loop,f_cas);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
Kokkos::deep_copy(h_data,data);
|
||||||
|
T val = h_data();
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T CASLoopSerial(int loop) {
|
||||||
|
T* data = new T[1];
|
||||||
|
data[0] = 0;
|
||||||
|
|
||||||
|
for(int i=0;i<loop;i++) {
|
||||||
|
T assumed;
|
||||||
|
T newval;
|
||||||
|
T old;
|
||||||
|
do {
|
||||||
|
assumed = *data;
|
||||||
|
newval = assumed + (T)1;
|
||||||
|
old = *data;
|
||||||
|
*data = newval;
|
||||||
|
}
|
||||||
|
while(!(assumed==old));
|
||||||
|
}
|
||||||
|
|
||||||
|
T val = *data;
|
||||||
|
delete data;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T,class DEVICE_TYPE>
|
||||||
|
struct ExchFunctor{
|
||||||
|
typedef DEVICE_TYPE execution_space;
|
||||||
|
typedef Kokkos::View<T,execution_space> type;
|
||||||
|
type data, data2;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(int i) const {
|
||||||
|
T old = Kokkos::atomic_exchange(&data(),(T)i);
|
||||||
|
Kokkos::atomic_fetch_add(&data2(),old);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T ExchLoop(int loop) {
|
||||||
|
struct ZeroFunctor<T,exec_space> f_zero;
|
||||||
|
typename ZeroFunctor<T,exec_space>::type data("Data");
|
||||||
|
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
|
||||||
|
f_zero.data = data;
|
||||||
|
Kokkos::parallel_for(1,f_zero);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
typename ZeroFunctor<T,exec_space>::type data2("Data");
|
||||||
|
typename ZeroFunctor<T,exec_space>::h_type h_data2("HData");
|
||||||
|
f_zero.data = data2;
|
||||||
|
Kokkos::parallel_for(1,f_zero);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
struct ExchFunctor<T,exec_space> f_exch;
|
||||||
|
f_exch.data = data;
|
||||||
|
f_exch.data2 = data2;
|
||||||
|
Kokkos::parallel_for(loop,f_exch);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
Kokkos::deep_copy(h_data,data);
|
||||||
|
Kokkos::deep_copy(h_data2,data2);
|
||||||
|
T val = h_data() + h_data2();
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T,class DEVICE_TYPE>
|
||||||
|
struct ExchNonAtomicFunctor{
|
||||||
|
typedef DEVICE_TYPE execution_space;
|
||||||
|
typedef Kokkos::View<T,execution_space> type;
|
||||||
|
type data, data2;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator()(int i) const {
|
||||||
|
T old = data();
|
||||||
|
data()=(T) i;
|
||||||
|
data2()+=old;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T ExchLoopNonAtomic(int loop) {
|
||||||
|
struct ZeroFunctor<T,exec_space> f_zero;
|
||||||
|
typename ZeroFunctor<T,exec_space>::type data("Data");
|
||||||
|
typename ZeroFunctor<T,exec_space>::h_type h_data("HData");
|
||||||
|
f_zero.data = data;
|
||||||
|
Kokkos::parallel_for(1,f_zero);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
typename ZeroFunctor<T,exec_space>::type data2("Data");
|
||||||
|
typename ZeroFunctor<T,exec_space>::h_type h_data2("HData");
|
||||||
|
f_zero.data = data2;
|
||||||
|
Kokkos::parallel_for(1,f_zero);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
struct ExchNonAtomicFunctor<T,exec_space> f_exch;
|
||||||
|
f_exch.data = data;
|
||||||
|
f_exch.data2 = data2;
|
||||||
|
Kokkos::parallel_for(loop,f_exch);
|
||||||
|
exec_space::fence();
|
||||||
|
|
||||||
|
Kokkos::deep_copy(h_data,data);
|
||||||
|
Kokkos::deep_copy(h_data2,data2);
|
||||||
|
T val = h_data() + h_data2();
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T ExchLoopSerial(int loop) {
|
||||||
|
T* data = new T[1];
|
||||||
|
T* data2 = new T[1];
|
||||||
|
data[0] = 0;
|
||||||
|
data2[0] = 0;
|
||||||
|
for(int i=0;i<loop;i++) {
|
||||||
|
T old = *data;
|
||||||
|
*data=(T) i;
|
||||||
|
*data2+=old;
|
||||||
|
}
|
||||||
|
|
||||||
|
T val = *data2 + *data;
|
||||||
|
delete data;
|
||||||
|
delete data2;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T LoopVariant(int loop, int test) {
|
||||||
|
switch (test) {
|
||||||
|
case 1: return AddLoop<T>(loop);
|
||||||
|
case 2: return CASLoop<T>(loop);
|
||||||
|
case 3: return ExchLoop<T>(loop);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T LoopVariantSerial(int loop, int test) {
|
||||||
|
switch (test) {
|
||||||
|
case 1: return AddLoopSerial<T>(loop);
|
||||||
|
case 2: return CASLoopSerial<T>(loop);
|
||||||
|
case 3: return ExchLoopSerial<T>(loop);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
T LoopVariantNonAtomic(int loop, int test) {
|
||||||
|
switch (test) {
|
||||||
|
case 1: return AddLoopNonAtomic<T>(loop);
|
||||||
|
case 2: return CASLoopNonAtomic<T>(loop);
|
||||||
|
case 3: return ExchLoopNonAtomic<T>(loop);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
void Loop(int loop, int test, const char* type_name) {
|
||||||
|
LoopVariant<T>(loop,test);
|
||||||
|
|
||||||
|
Kokkos::Impl::Timer timer;
|
||||||
|
T res = LoopVariant<T>(loop,test);
|
||||||
|
double time1 = timer.seconds();
|
||||||
|
|
||||||
|
timer.reset();
|
||||||
|
T resNonAtomic = LoopVariantNonAtomic<T>(loop,test);
|
||||||
|
double time2 = timer.seconds();
|
||||||
|
|
||||||
|
timer.reset();
|
||||||
|
T resSerial = LoopVariantSerial<T>(loop,test);
|
||||||
|
double time3 = timer.seconds();
|
||||||
|
|
||||||
|
time1*=1e6/loop;
|
||||||
|
time2*=1e6/loop;
|
||||||
|
time3*=1e6/loop;
|
||||||
|
//textcolor_standard();
|
||||||
|
bool passed = true;
|
||||||
|
if(resSerial!=res) passed = false;
|
||||||
|
//if(!passed) textcolor(RESET,BLACK,YELLOW);
|
||||||
|
printf("%s Test %i %s --- Loop: %i Value (S,A,NA): %e %e %e Time: %7.4e %7.4e %7.4e Size of Type %i)",type_name,test,passed?"PASSED":"FAILED",loop,1.0*resSerial,1.0*res,1.0*resNonAtomic,time1,time2,time3,(int)sizeof(T));
|
||||||
|
//if(!passed) textcolor_standard();
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
void Test(int loop, int test, const char* type_name) {
|
||||||
|
if(test==-1) {
|
||||||
|
Loop<T>(loop,1,type_name);
|
||||||
|
Loop<T>(loop,2,type_name);
|
||||||
|
Loop<T>(loop,3,type_name);
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
Loop<T>(loop,test,type_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
int type = -1;
|
||||||
|
int loop = 1000000;
|
||||||
|
int test = -1;
|
||||||
|
|
||||||
|
for(int i=0;i<argc;i++)
|
||||||
|
{
|
||||||
|
if((strcmp(argv[i],"--test")==0)) {test=atoi(argv[++i]); continue;}
|
||||||
|
if((strcmp(argv[i],"--type")==0)) {type=atoi(argv[++i]); continue;}
|
||||||
|
if((strcmp(argv[i],"-l")==0)||(strcmp(argv[i],"--loop")==0)) {loop=atoi(argv[++i]); continue;}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Kokkos::initialize(argc,argv);
|
||||||
|
|
||||||
|
|
||||||
|
printf("Using %s\n",Kokkos::atomic_query_version());
|
||||||
|
bool all_tests = false;
|
||||||
|
if(type==-1) all_tests = true;
|
||||||
|
while(type<100) {
|
||||||
|
if(type==1) {
|
||||||
|
Test<int>(loop,test,"int ");
|
||||||
|
}
|
||||||
|
if(type==2) {
|
||||||
|
Test<long int>(loop,test,"long int ");
|
||||||
|
}
|
||||||
|
if(type==3) {
|
||||||
|
Test<long long int>(loop,test,"long long int ");
|
||||||
|
}
|
||||||
|
if(type==4) {
|
||||||
|
Test<unsigned int>(loop,test,"unsigned int ");
|
||||||
|
}
|
||||||
|
if(type==5) {
|
||||||
|
Test<unsigned long int>(loop,test,"unsigned long int ");
|
||||||
|
}
|
||||||
|
if(type==6) {
|
||||||
|
Test<unsigned long long int>(loop,test,"unsigned long long int ");
|
||||||
|
}
|
||||||
|
if(type==10) {
|
||||||
|
//Test<float>(loop,test,"float ");
|
||||||
|
}
|
||||||
|
if(type==11) {
|
||||||
|
Test<double>(loop,test,"double ");
|
||||||
|
}
|
||||||
|
if(!all_tests) type=100;
|
||||||
|
else type++;
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::finalize();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
283
lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp
Executable file
283
lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp
Executable file
@ -0,0 +1,283 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
|
||||||
|
#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
|
||||||
|
|
||||||
|
/* only compile this file if CUDA is enabled for Kokkos */
|
||||||
|
#if defined( KOKKOS_HAVE_CUDA )
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Experimental {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Cuda Texture fetches can be performed for 4, 8 and 16 byte objects (int,int2,int4)
|
||||||
|
// Via reinterpret_case this can be used to support all scalar types of those sizes.
|
||||||
|
// Any other scalar type falls back to either normal reads out of global memory,
|
||||||
|
// or using the __ldg intrinsic on Kepler GPUs or newer (Compute Capability >= 3.0)
|
||||||
|
|
||||||
|
template< typename ValueType , typename AliasType >
|
||||||
|
struct CudaTextureFetch {
|
||||||
|
|
||||||
|
::cudaTextureObject_t m_obj ;
|
||||||
|
const ValueType * m_ptr ;
|
||||||
|
int m_offset ;
|
||||||
|
|
||||||
|
// Deference operator pulls through texture object and returns by value
|
||||||
|
template< typename iType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
ValueType operator[]( const iType & i ) const
|
||||||
|
{
|
||||||
|
#if defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
|
||||||
|
AliasType v = tex1Dfetch<AliasType>( m_obj , i + m_offset );
|
||||||
|
return *(reinterpret_cast<ValueType*> (&v));
|
||||||
|
#else
|
||||||
|
return m_ptr[ i ];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pointer to referenced memory
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
operator const ValueType * () const { return m_ptr ; }
|
||||||
|
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch() : m_obj() , m_ptr() , m_offset() {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
~CudaTextureFetch() {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch( const CudaTextureFetch & rhs )
|
||||||
|
: m_obj( rhs.m_obj )
|
||||||
|
, m_ptr( rhs.m_ptr )
|
||||||
|
, m_offset( rhs.m_offset )
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch( CudaTextureFetch && rhs )
|
||||||
|
: m_obj( rhs.m_obj )
|
||||||
|
, m_ptr( rhs.m_ptr )
|
||||||
|
, m_offset( rhs.m_offset )
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs )
|
||||||
|
{
|
||||||
|
m_obj = rhs.m_obj ;
|
||||||
|
m_ptr = rhs.m_ptr ;
|
||||||
|
m_offset = rhs.m_offset ;
|
||||||
|
return *this ;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch & operator = ( CudaTextureFetch && rhs )
|
||||||
|
{
|
||||||
|
m_obj = rhs.m_obj ;
|
||||||
|
m_ptr = rhs.m_ptr ;
|
||||||
|
m_offset = rhs.m_offset ;
|
||||||
|
return *this ;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Texture object spans the entire allocation.
|
||||||
|
// This handle may view a subset of the allocation, so an offset is required.
|
||||||
|
template< class CudaMemorySpace >
|
||||||
|
inline explicit
|
||||||
|
CudaTextureFetch( const ValueType * const arg_ptr
|
||||||
|
, Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > & record
|
||||||
|
)
|
||||||
|
// 'attach_texture_object' returns 0 when __CUDA_ARCH__ < 300
|
||||||
|
: m_obj( record.template attach_texture_object< AliasType >() )
|
||||||
|
, m_ptr( arg_ptr )
|
||||||
|
, m_offset( record.attach_texture_object_offset( reinterpret_cast<const AliasType*>( arg_ptr ) ) )
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
|
||||||
|
|
||||||
|
template< typename ValueType , typename AliasType >
|
||||||
|
struct CudaLDGFetch {
|
||||||
|
|
||||||
|
const ValueType * m_ptr ;
|
||||||
|
|
||||||
|
template< typename iType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
ValueType operator[]( const iType & i ) const
|
||||||
|
{
|
||||||
|
AliasType v = __ldg(reinterpret_cast<AliasType*>(&m_ptr[i]));
|
||||||
|
return *(reinterpret_cast<ValueType*> (&v));
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
operator const ValueType * () const { return m_ptr ; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaLDGFetch() : m_ptr() {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
~CudaLDGFetch() {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaLDGFetch( const CudaLDGFetch & rhs )
|
||||||
|
: m_ptr( rhs.m_ptr )
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaLDGFetch( CudaLDGFetch && rhs )
|
||||||
|
: m_ptr( rhs.m_ptr )
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaLDGFetch & operator = ( const CudaLDGFetch & rhs )
|
||||||
|
{
|
||||||
|
m_ptr = rhs.m_ptr ;
|
||||||
|
return *this ;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaLDGFetch & operator = ( CudaLDGFetch && rhs )
|
||||||
|
{
|
||||||
|
m_ptr = rhs.m_ptr ;
|
||||||
|
return *this ;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class CudaMemorySpace >
|
||||||
|
inline explicit
|
||||||
|
CudaTextureFetch( const ValueType * const arg_ptr
|
||||||
|
, Kokkos::Experimental::Impl::SharedAllocationRecord< CudaMemorySpace , void > const &
|
||||||
|
)
|
||||||
|
: m_ptr( arg_data_ptr )
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Experimental
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Experimental {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
/** \brief Replace Default ViewDataHandle with Cuda texture fetch specialization
|
||||||
|
* if 'const' value type, CudaSpace and random access.
|
||||||
|
*/
|
||||||
|
template< class Traits >
|
||||||
|
class ViewDataHandle< Traits ,
|
||||||
|
typename std::enable_if<(
|
||||||
|
// Is Cuda memory space
|
||||||
|
( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value ||
|
||||||
|
std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )
|
||||||
|
&&
|
||||||
|
// Is a trivial const value of 4, 8, or 16 bytes
|
||||||
|
std::is_trivial<typename Traits::const_value_type>::value
|
||||||
|
&&
|
||||||
|
std::is_same<typename Traits::const_value_type,typename Traits::value_type>::value
|
||||||
|
&&
|
||||||
|
( sizeof(typename Traits::const_value_type) == 4 ||
|
||||||
|
sizeof(typename Traits::const_value_type) == 8 ||
|
||||||
|
sizeof(typename Traits::const_value_type) == 16 )
|
||||||
|
&&
|
||||||
|
// Random access trait
|
||||||
|
( Traits::memory_traits::RandomAccess != 0 )
|
||||||
|
)>::type >
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
using track_type = Kokkos::Experimental::Impl::SharedAllocationTracker ;
|
||||||
|
|
||||||
|
using value_type = typename Traits::const_value_type ;
|
||||||
|
using return_type = typename Traits::const_value_type ; // NOT a reference
|
||||||
|
|
||||||
|
using alias_type = typename std::conditional< ( sizeof(value_type) == 4 ) , int ,
|
||||||
|
typename std::conditional< ( sizeof(value_type) == 8 ) , ::int2 ,
|
||||||
|
typename std::conditional< ( sizeof(value_type) == 16 ) , ::int4 , void
|
||||||
|
>::type
|
||||||
|
>::type
|
||||||
|
>::type ;
|
||||||
|
|
||||||
|
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
|
||||||
|
using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ;
|
||||||
|
#else
|
||||||
|
using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static handle_type const & assign( handle_type const & arg_handle , track_type const & /* arg_tracker */ )
|
||||||
|
{
|
||||||
|
return arg_handle ;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static handle_type assign( value_type * arg_data_ptr, track_type const & arg_tracker )
|
||||||
|
{
|
||||||
|
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
// Assignment of texture = non-texture requires creation of a texture object
|
||||||
|
// which can only occur on the host. In addition, 'get_record' is only valid
|
||||||
|
// if called in a host execution space
|
||||||
|
return handle_type( arg_data_ptr , arg_tracker.template get_record< typename Traits::memory_space >() );
|
||||||
|
#else
|
||||||
|
Kokkos::Impl::cuda_abort("Cannot create Cuda texture object from within a Cuda kernel");
|
||||||
|
return handle_type();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||||
|
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */
|
||||||
|
|
||||||
277
lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
Executable file
277
lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
Executable file
@ -0,0 +1,277 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_CUDAEXEC_HPP
|
||||||
|
#define KOKKOS_CUDAEXEC_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
|
||||||
|
/* only compile this file if CUDA is enabled for Kokkos */
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <Kokkos_Parallel.hpp>
|
||||||
|
#include <impl/Kokkos_Error.hpp>
|
||||||
|
#include <Cuda/Kokkos_Cuda_abort.hpp>
|
||||||
|
#include <Cuda/Kokkos_Cuda_Error.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
struct CudaTraits {
|
||||||
|
enum { WarpSize = 32 /* 0x0020 */ };
|
||||||
|
enum { WarpIndexMask = 0x001f /* Mask for warpindex */ };
|
||||||
|
enum { WarpIndexShift = 5 /* WarpSize == 1 << WarpShift */ };
|
||||||
|
|
||||||
|
enum { SharedMemoryBanks = 32 /* Compute device 2.0 */ };
|
||||||
|
enum { SharedMemoryCapacity = 0x0C000 /* 48k shared / 16k L1 Cache */ };
|
||||||
|
enum { SharedMemoryUsage = 0x04000 /* 16k shared / 48k L1 Cache */ };
|
||||||
|
|
||||||
|
enum { UpperBoundGridCount = 65535 /* Hard upper bound */ };
|
||||||
|
enum { ConstantMemoryCapacity = 0x010000 /* 64k bytes */ };
|
||||||
|
enum { ConstantMemoryUsage = 0x008000 /* 32k bytes */ };
|
||||||
|
enum { ConstantMemoryCache = 0x002000 /* 8k bytes */ };
|
||||||
|
|
||||||
|
typedef unsigned long
|
||||||
|
ConstantGlobalBufferType[ ConstantMemoryUsage / sizeof(unsigned long) ];
|
||||||
|
|
||||||
|
enum { ConstantMemoryUseThreshold = 0x000200 /* 512 bytes */ };
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION static
|
||||||
|
CudaSpace::size_type warp_count( CudaSpace::size_type i )
|
||||||
|
{ return ( i + WarpIndexMask ) >> WarpIndexShift ; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION static
|
||||||
|
CudaSpace::size_type warp_align( CudaSpace::size_type i )
|
||||||
|
{
|
||||||
|
enum { Mask = ~CudaSpace::size_type( WarpIndexMask ) };
|
||||||
|
return ( i + WarpIndexMask ) & Mask ;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
CudaSpace::size_type cuda_internal_maximum_warp_count();
|
||||||
|
CudaSpace::size_type cuda_internal_maximum_grid_count();
|
||||||
|
CudaSpace::size_type cuda_internal_maximum_shared_words();
|
||||||
|
|
||||||
|
CudaSpace::size_type * cuda_internal_scratch_flags( const CudaSpace::size_type size );
|
||||||
|
CudaSpace::size_type * cuda_internal_scratch_space( const CudaSpace::size_type size );
|
||||||
|
CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type size );
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#if defined( __CUDACC__ )
|
||||||
|
|
||||||
|
/** \brief Access to constant memory on the device */
|
||||||
|
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||||
|
extern
|
||||||
|
#endif
|
||||||
|
__device__ __constant__
|
||||||
|
Kokkos::Impl::CudaTraits::ConstantGlobalBufferType
|
||||||
|
kokkos_impl_cuda_constant_memory_buffer ;
|
||||||
|
|
||||||
|
__device__ __constant__
|
||||||
|
int* kokkos_impl_cuda_atomic_lock_array ;
|
||||||
|
#define CUDA_SPACE_ATOMIC_MASK 0x1FFFF
|
||||||
|
#define CUDA_SPACE_ATOMIC_XOR_MASK 0x15A39
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
__device__ inline
|
||||||
|
bool lock_address_cuda_space(void* ptr) {
|
||||||
|
size_t offset = size_t(ptr);
|
||||||
|
offset = offset >> 2;
|
||||||
|
offset = offset & CUDA_SPACE_ATOMIC_MASK;
|
||||||
|
//offset = offset xor CUDA_SPACE_ATOMIC_XOR_MASK;
|
||||||
|
return (0 == atomicCAS(&kokkos_impl_cuda_atomic_lock_array[offset],0,1));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ inline
|
||||||
|
void unlock_address_cuda_space(void* ptr) {
|
||||||
|
size_t offset = size_t(ptr);
|
||||||
|
offset = offset >> 2;
|
||||||
|
offset = offset & CUDA_SPACE_ATOMIC_MASK;
|
||||||
|
//offset = offset xor CUDA_SPACE_ATOMIC_XOR_MASK;
|
||||||
|
atomicExch( &kokkos_impl_cuda_atomic_lock_array[ offset ], 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename T >
|
||||||
|
inline
|
||||||
|
__device__
|
||||||
|
T * kokkos_impl_cuda_shared_memory()
|
||||||
|
{ extern __shared__ Kokkos::CudaSpace::size_type sh[]; return (T*) sh ; }
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// See section B.17 of Cuda C Programming Guide Version 3.2
|
||||||
|
// for discussion of
|
||||||
|
// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor)
|
||||||
|
// function qualifier which could be used to improve performance.
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Maximize L1 cache and minimize shared memory:
|
||||||
|
// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferL1 );
|
||||||
|
// For 2.0 capability: 48 KB L1 and 16 KB shared
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template< class DriverType >
|
||||||
|
__global__
|
||||||
|
static void cuda_parallel_launch_constant_memory()
|
||||||
|
{
|
||||||
|
const DriverType & driver =
|
||||||
|
*((const DriverType *) kokkos_impl_cuda_constant_memory_buffer );
|
||||||
|
|
||||||
|
driver();
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class DriverType >
|
||||||
|
__global__
|
||||||
|
static void cuda_parallel_launch_local_memory( const DriverType driver )
|
||||||
|
{
|
||||||
|
driver();
|
||||||
|
}
|
||||||
|
|
||||||
|
template < class DriverType ,
|
||||||
|
bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) ) >
|
||||||
|
struct CudaParallelLaunch ;
|
||||||
|
|
||||||
|
template < class DriverType >
|
||||||
|
struct CudaParallelLaunch< DriverType , true > {
|
||||||
|
|
||||||
|
inline
|
||||||
|
CudaParallelLaunch( const DriverType & driver
|
||||||
|
, const dim3 & grid
|
||||||
|
, const dim3 & block
|
||||||
|
, const int shmem
|
||||||
|
, const cudaStream_t stream = 0 )
|
||||||
|
{
|
||||||
|
if ( grid.x && ( block.x * block.y * block.z ) ) {
|
||||||
|
|
||||||
|
if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) <
|
||||||
|
sizeof( DriverType ) ) {
|
||||||
|
Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( CudaTraits::SharedMemoryCapacity < shmem ) {
|
||||||
|
Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") );
|
||||||
|
}
|
||||||
|
else if ( shmem ) {
|
||||||
|
cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferShared );
|
||||||
|
} else {
|
||||||
|
cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferL1 );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy functor to constant memory on the device
|
||||||
|
cudaMemcpyToSymbol( kokkos_impl_cuda_constant_memory_buffer , & driver , sizeof(DriverType) );
|
||||||
|
|
||||||
|
int* lock_array_ptr = lock_array_cuda_space_ptr();
|
||||||
|
cudaMemcpyToSymbol( kokkos_impl_cuda_atomic_lock_array , & lock_array_ptr , sizeof(int*) );
|
||||||
|
|
||||||
|
// Invoke the driver function on the device
|
||||||
|
cuda_parallel_launch_constant_memory< DriverType ><<< grid , block , shmem , stream >>>();
|
||||||
|
|
||||||
|
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
|
||||||
|
Kokkos::Cuda::fence();
|
||||||
|
CUDA_SAFE_CALL( cudaGetLastError() );
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template < class DriverType >
|
||||||
|
struct CudaParallelLaunch< DriverType , false > {
|
||||||
|
|
||||||
|
inline
|
||||||
|
CudaParallelLaunch( const DriverType & driver
|
||||||
|
, const dim3 & grid
|
||||||
|
, const dim3 & block
|
||||||
|
, const int shmem
|
||||||
|
, const cudaStream_t stream = 0 )
|
||||||
|
{
|
||||||
|
if ( grid.x && ( block.x * block.y * block.z ) ) {
|
||||||
|
|
||||||
|
if ( CudaTraits::SharedMemoryCapacity < shmem ) {
|
||||||
|
Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") );
|
||||||
|
}
|
||||||
|
else if ( shmem ) {
|
||||||
|
cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferShared );
|
||||||
|
} else {
|
||||||
|
cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferL1 );
|
||||||
|
}
|
||||||
|
|
||||||
|
int* lock_array_ptr = lock_array_cuda_space_ptr();
|
||||||
|
cudaMemcpyToSymbol( kokkos_impl_cuda_atomic_lock_array , & lock_array_ptr , sizeof(int*) );
|
||||||
|
|
||||||
|
cuda_parallel_launch_local_memory< DriverType ><<< grid , block , shmem , stream >>>( driver );
|
||||||
|
|
||||||
|
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
|
||||||
|
Kokkos::Cuda::fence();
|
||||||
|
CUDA_SAFE_CALL( cudaGetLastError() );
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* defined( __CUDACC__ ) */
|
||||||
|
#endif /* defined( KOKKOS_HAVE_CUDA ) */
|
||||||
|
#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */
|
||||||
670
lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
Executable file
670
lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
Executable file
@ -0,0 +1,670 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
|
||||||
|
/* only compile this file if CUDA is enabled for Kokkos */
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
#include <Kokkos_Cuda.hpp>
|
||||||
|
#include <Kokkos_CudaSpace.hpp>
|
||||||
|
|
||||||
|
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
|
||||||
|
#include <Cuda/Kokkos_Cuda_Internal.hpp>
|
||||||
|
#include <impl/Kokkos_Error.hpp>
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
DeepCopy<CudaSpace,CudaSpace>::DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }
|
||||||
|
|
||||||
|
DeepCopy<CudaSpace,CudaSpace>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
|
||||||
|
{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }
|
||||||
|
|
||||||
|
DeepCopy<HostSpace,CudaSpace>::DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }
|
||||||
|
|
||||||
|
DeepCopy<HostSpace,CudaSpace>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
|
||||||
|
{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }
|
||||||
|
|
||||||
|
DeepCopy<CudaSpace,HostSpace>::DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ CUDA_SAFE_CALL( cudaMemcpy( dst , src , n , cudaMemcpyDefault ) ); }
|
||||||
|
|
||||||
|
DeepCopy<CudaSpace,HostSpace>::DeepCopy( const Cuda & instance , void * dst , const void * src , size_t n )
|
||||||
|
{ CUDA_SAFE_CALL( cudaMemcpyAsync( dst , src , n , cudaMemcpyDefault , instance.cuda_stream() ) ); }
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
void texture_object_attach_impl( Impl::AllocationTracker const & tracker
|
||||||
|
, unsigned type_size
|
||||||
|
, ::cudaChannelFormatDesc const & desc
|
||||||
|
)
|
||||||
|
{
|
||||||
|
enum { TEXTURE_BOUND_1D = 2u << 27 };
|
||||||
|
|
||||||
|
if ( tracker.attribute() == NULL ) {
|
||||||
|
// check for correct allocator
|
||||||
|
const bool ok_alloc = tracker.allocator()->support_texture_binding();
|
||||||
|
|
||||||
|
const bool ok_count = (tracker.alloc_size() / type_size) < TEXTURE_BOUND_1D;
|
||||||
|
|
||||||
|
if (ok_alloc && ok_count) {
|
||||||
|
Impl::TextureAttribute * attr = new Impl::TextureAttribute( tracker.alloc_ptr(), tracker.alloc_size(), desc );
|
||||||
|
tracker.set_attribute( attr );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << "Error: Cannot attach texture object";
|
||||||
|
if (!ok_alloc) {
|
||||||
|
oss << ", incompatabile allocator " << tracker.allocator()->name();
|
||||||
|
}
|
||||||
|
if (!ok_count) {
|
||||||
|
oss << ", array " << tracker.label() << " too large";
|
||||||
|
}
|
||||||
|
oss << ".";
|
||||||
|
Kokkos::Impl::throw_runtime_exception( oss.str() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( NULL == dynamic_cast<Impl::TextureAttribute *>(tracker.attribute()) ) {
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << "Error: Allocation " << tracker.label() << " already has an attribute attached.";
|
||||||
|
Kokkos::Impl::throw_runtime_exception( oss.str() );
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
} // unnamed namespace
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
Impl::AllocationTracker CudaSpace::allocate_and_track( const std::string & label, const size_t size )
|
||||||
|
{
|
||||||
|
return Impl::AllocationTracker( allocator(), size, label);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CudaSpace::texture_object_attach( Impl::AllocationTracker const & tracker
|
||||||
|
, unsigned type_size
|
||||||
|
, ::cudaChannelFormatDesc const & desc
|
||||||
|
)
|
||||||
|
{
|
||||||
|
texture_object_attach_impl( tracker, type_size, desc );
|
||||||
|
}
|
||||||
|
|
||||||
|
void CudaSpace::access_error()
|
||||||
|
{
|
||||||
|
const std::string msg("Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space" );
|
||||||
|
Kokkos::Impl::throw_runtime_exception( msg );
|
||||||
|
}
|
||||||
|
|
||||||
|
void CudaSpace::access_error( const void * const )
|
||||||
|
{
|
||||||
|
const std::string msg("Kokkos::CudaSpace::access_error attempt to execute Cuda function from non-Cuda space" );
|
||||||
|
Kokkos::Impl::throw_runtime_exception( msg );
|
||||||
|
}
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
Impl::AllocationTracker CudaUVMSpace::allocate_and_track( const std::string & label, const size_t size )
|
||||||
|
{
|
||||||
|
return Impl::AllocationTracker( allocator(), size, label);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CudaUVMSpace::texture_object_attach( Impl::AllocationTracker const & tracker
|
||||||
|
, unsigned type_size
|
||||||
|
, ::cudaChannelFormatDesc const & desc
|
||||||
|
)
|
||||||
|
{
|
||||||
|
texture_object_attach_impl( tracker, type_size, desc );
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CudaUVMSpace::available()
|
||||||
|
{
|
||||||
|
#if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION ) && !defined(__APPLE__)
|
||||||
|
enum { UVM_available = true };
|
||||||
|
#else
|
||||||
|
enum { UVM_available = false };
|
||||||
|
#endif
|
||||||
|
return UVM_available;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
Impl::AllocationTracker CudaHostPinnedSpace::allocate_and_track( const std::string & label, const size_t size )
|
||||||
|
{
|
||||||
|
return Impl::AllocationTracker( allocator(), size, label);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
CudaSpace::CudaSpace()
|
||||||
|
: m_device( Kokkos::Cuda().cuda_device() )
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
CudaUVMSpace::CudaUVMSpace()
|
||||||
|
: m_device( Kokkos::Cuda().cuda_device() )
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
CudaHostPinnedSpace::CudaHostPinnedSpace()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void * CudaSpace::allocate( const size_t arg_alloc_size ) const
|
||||||
|
{
|
||||||
|
void * ptr = NULL;
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL( cudaMalloc( &ptr, arg_alloc_size ) );
|
||||||
|
|
||||||
|
return ptr ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const
|
||||||
|
{
|
||||||
|
void * ptr = NULL;
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) );
|
||||||
|
|
||||||
|
return ptr ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void * CudaHostPinnedSpace::allocate( const size_t arg_alloc_size ) const
|
||||||
|
{
|
||||||
|
void * ptr = NULL;
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL( cudaHostAlloc( &ptr, arg_alloc_size , cudaHostAllocDefault ) );
|
||||||
|
|
||||||
|
return ptr ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CudaSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
|
||||||
|
} catch(...) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
|
||||||
|
} catch(...) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CudaHostPinnedSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
CUDA_SAFE_CALL( cudaFreeHost( arg_alloc_ptr ) );
|
||||||
|
} catch(...) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Experimental {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
SharedAllocationRecord< void , void >
|
||||||
|
SharedAllocationRecord< Kokkos::CudaSpace , void >::s_root_record ;
|
||||||
|
|
||||||
|
SharedAllocationRecord< void , void >
|
||||||
|
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::s_root_record ;
|
||||||
|
|
||||||
|
SharedAllocationRecord< void , void >
|
||||||
|
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::s_root_record ;
|
||||||
|
|
||||||
|
::cudaTextureObject_t
|
||||||
|
SharedAllocationRecord< Kokkos::CudaSpace , void >::
|
||||||
|
attach_texture_object( const unsigned sizeof_alias
|
||||||
|
, void * const alloc_ptr
|
||||||
|
, size_t const alloc_size )
|
||||||
|
{
|
||||||
|
// Only valid for 300 <= __CUDA_ARCH__
|
||||||
|
// otherwise return zero.
|
||||||
|
|
||||||
|
::cudaTextureObject_t tex_obj ;
|
||||||
|
|
||||||
|
struct cudaResourceDesc resDesc ;
|
||||||
|
struct cudaTextureDesc texDesc ;
|
||||||
|
|
||||||
|
memset( & resDesc , 0 , sizeof(resDesc) );
|
||||||
|
memset( & texDesc , 0 , sizeof(texDesc) );
|
||||||
|
|
||||||
|
resDesc.resType = cudaResourceTypeLinear ;
|
||||||
|
resDesc.res.linear.desc = ( sizeof_alias == 4 ? cudaCreateChannelDesc< int >() :
|
||||||
|
( sizeof_alias == 8 ? cudaCreateChannelDesc< ::int2 >() :
|
||||||
|
/* sizeof_alias == 16 */ cudaCreateChannelDesc< ::int4 >() ) );
|
||||||
|
resDesc.res.linear.sizeInBytes = alloc_size ;
|
||||||
|
resDesc.res.linear.devPtr = alloc_ptr ;
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL( cudaCreateTextureObject( & tex_obj , & resDesc, & texDesc, NULL ) );
|
||||||
|
|
||||||
|
return tex_obj ;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
SharedAllocationRecord< Kokkos::CudaSpace , void >::get_label() const
|
||||||
|
{
|
||||||
|
SharedAllocationHeader header ;
|
||||||
|
|
||||||
|
Kokkos::Impl::DeepCopy< Kokkos::HostSpace , Kokkos::CudaSpace >( & header , RecordBase::head() , sizeof(SharedAllocationHeader) );
|
||||||
|
|
||||||
|
return std::string( header.m_label );
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_label() const
|
||||||
|
{
|
||||||
|
return std::string( RecordBase::head()->m_label );
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_label() const
|
||||||
|
{
|
||||||
|
return std::string( RecordBase::head()->m_label );
|
||||||
|
}
|
||||||
|
|
||||||
|
SharedAllocationRecord< Kokkos::CudaSpace , void > *
|
||||||
|
SharedAllocationRecord< Kokkos::CudaSpace , void >::
|
||||||
|
allocate( const Kokkos::CudaSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
|
||||||
|
}
|
||||||
|
|
||||||
|
SharedAllocationRecord< Kokkos::CudaUVMSpace , void > *
|
||||||
|
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
|
||||||
|
allocate( const Kokkos::CudaUVMSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
|
||||||
|
}
|
||||||
|
|
||||||
|
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > *
|
||||||
|
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
|
||||||
|
allocate( const Kokkos::CudaHostPinnedSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
SharedAllocationRecord< Kokkos::CudaSpace , void >::
|
||||||
|
deallocate( SharedAllocationRecord< void , void > * arg_rec )
|
||||||
|
{
|
||||||
|
delete static_cast<SharedAllocationRecord*>(arg_rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
|
||||||
|
deallocate( SharedAllocationRecord< void , void > * arg_rec )
|
||||||
|
{
|
||||||
|
delete static_cast<SharedAllocationRecord*>(arg_rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
|
||||||
|
deallocate( SharedAllocationRecord< void , void > * arg_rec )
|
||||||
|
{
|
||||||
|
delete static_cast<SharedAllocationRecord*>(arg_rec);
|
||||||
|
}
|
||||||
|
|
||||||
|
SharedAllocationRecord< Kokkos::CudaSpace , void >::
|
||||||
|
~SharedAllocationRecord()
|
||||||
|
{
|
||||||
|
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
|
||||||
|
, SharedAllocationRecord< void , void >::m_alloc_size
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
|
||||||
|
~SharedAllocationRecord()
|
||||||
|
{
|
||||||
|
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
|
||||||
|
, SharedAllocationRecord< void , void >::m_alloc_size
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
|
||||||
|
~SharedAllocationRecord()
|
||||||
|
{
|
||||||
|
m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr
|
||||||
|
, SharedAllocationRecord< void , void >::m_alloc_size
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
SharedAllocationRecord< Kokkos::CudaSpace , void >::
|
||||||
|
SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
|
||||||
|
)
|
||||||
|
// Pass through allocated [ SharedAllocationHeader , user_memory ]
|
||||||
|
// Pass through deallocation function
|
||||||
|
: SharedAllocationRecord< void , void >
|
||||||
|
( & SharedAllocationRecord< Kokkos::CudaSpace , void >::s_root_record
|
||||||
|
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
|
||||||
|
, sizeof(SharedAllocationHeader) + arg_alloc_size
|
||||||
|
, arg_dealloc
|
||||||
|
)
|
||||||
|
, m_tex_obj( 0 )
|
||||||
|
, m_space( arg_space )
|
||||||
|
{
|
||||||
|
SharedAllocationHeader header ;
|
||||||
|
|
||||||
|
// Fill in the Header information
|
||||||
|
header.m_record = static_cast< SharedAllocationRecord< void , void > * >( this );
|
||||||
|
|
||||||
|
strncpy( header.m_label
|
||||||
|
, arg_label.c_str()
|
||||||
|
, SharedAllocationHeader::maximum_label_length
|
||||||
|
);
|
||||||
|
|
||||||
|
// Copy to device memory
|
||||||
|
Kokkos::Impl::DeepCopy<CudaSpace,HostSpace>::DeepCopy( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) );
|
||||||
|
}
|
||||||
|
|
||||||
|
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
|
||||||
|
SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
|
||||||
|
)
|
||||||
|
// Pass through allocated [ SharedAllocationHeader , user_memory ]
|
||||||
|
// Pass through deallocation function
|
||||||
|
: SharedAllocationRecord< void , void >
|
||||||
|
( & SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::s_root_record
|
||||||
|
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
|
||||||
|
, sizeof(SharedAllocationHeader) + arg_alloc_size
|
||||||
|
, arg_dealloc
|
||||||
|
)
|
||||||
|
, m_tex_obj( 0 )
|
||||||
|
, m_space( arg_space )
|
||||||
|
{
|
||||||
|
// Fill in the Header information, directly accessible via UVM
|
||||||
|
|
||||||
|
RecordBase::m_alloc_ptr->m_record = this ;
|
||||||
|
|
||||||
|
strncpy( RecordBase::m_alloc_ptr->m_label
|
||||||
|
, arg_label.c_str()
|
||||||
|
, SharedAllocationHeader::maximum_label_length
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
|
||||||
|
SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
, const SharedAllocationRecord< void , void >::function_type arg_dealloc
|
||||||
|
)
|
||||||
|
// Pass through allocated [ SharedAllocationHeader , user_memory ]
|
||||||
|
// Pass through deallocation function
|
||||||
|
: SharedAllocationRecord< void , void >
|
||||||
|
( & SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::s_root_record
|
||||||
|
, reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) )
|
||||||
|
, sizeof(SharedAllocationHeader) + arg_alloc_size
|
||||||
|
, arg_dealloc
|
||||||
|
)
|
||||||
|
, m_space( arg_space )
|
||||||
|
{
|
||||||
|
// Fill in the Header information, directly accessible via UVM
|
||||||
|
|
||||||
|
RecordBase::m_alloc_ptr->m_record = this ;
|
||||||
|
|
||||||
|
strncpy( RecordBase::m_alloc_ptr->m_label
|
||||||
|
, arg_label.c_str()
|
||||||
|
, SharedAllocationHeader::maximum_label_length
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
SharedAllocationRecord< Kokkos::CudaSpace , void > *
|
||||||
|
SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr )
|
||||||
|
{
|
||||||
|
using Header = SharedAllocationHeader ;
|
||||||
|
using RecordBase = SharedAllocationRecord< void , void > ;
|
||||||
|
using RecordCuda = SharedAllocationRecord< Kokkos::CudaSpace , void > ;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// Copy the header from the allocation
|
||||||
|
SharedAllocationHeader head ;
|
||||||
|
|
||||||
|
SharedAllocationHeader const * const head_cuda = Header::get_header( alloc_ptr );
|
||||||
|
|
||||||
|
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , head_cuda , sizeof(SharedAllocationHeader) );
|
||||||
|
|
||||||
|
RecordCuda * const record = static_cast< RecordCuda * >( head.m_record );
|
||||||
|
|
||||||
|
if ( record->m_alloc_ptr != head_cuda ) {
|
||||||
|
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// Iterate the list to search for the record among all allocations
|
||||||
|
// requires obtaining the root of the list and then locking the list.
|
||||||
|
|
||||||
|
RecordCuda * const record = static_cast< RecordCuda * >( RecordBase::find( & s_root_record , alloc_ptr ) );
|
||||||
|
|
||||||
|
if ( record == 0 ) {
|
||||||
|
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record ERROR" ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return record ;
|
||||||
|
}
|
||||||
|
|
||||||
|
SharedAllocationRecord< Kokkos::CudaUVMSpace , void > *
|
||||||
|
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record( void * alloc_ptr )
|
||||||
|
{
|
||||||
|
using Header = SharedAllocationHeader ;
|
||||||
|
using RecordCuda = SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;
|
||||||
|
|
||||||
|
Header * const h = reinterpret_cast< Header * >( alloc_ptr ) - 1 ;
|
||||||
|
|
||||||
|
if ( h->m_record->m_alloc_ptr != h ) {
|
||||||
|
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::get_record ERROR" ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
return static_cast< RecordCuda * >( h->m_record );
|
||||||
|
}
|
||||||
|
|
||||||
|
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > *
|
||||||
|
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record( void * alloc_ptr )
|
||||||
|
{
|
||||||
|
using Header = SharedAllocationHeader ;
|
||||||
|
using RecordCuda = SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > ;
|
||||||
|
|
||||||
|
Header * const h = reinterpret_cast< Header * >( alloc_ptr ) - 1 ;
|
||||||
|
|
||||||
|
if ( h->m_record->m_alloc_ptr != h ) {
|
||||||
|
Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::get_record ERROR" ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
return static_cast< RecordCuda * >( h->m_record );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate records to print orphaned memory ...
|
||||||
|
void
|
||||||
|
SharedAllocationRecord< Kokkos::CudaSpace , void >::
|
||||||
|
print_records( std::ostream & s , const Kokkos::CudaSpace & space , bool detail )
|
||||||
|
{
|
||||||
|
SharedAllocationRecord< void , void > * r = & s_root_record ;
|
||||||
|
|
||||||
|
char buffer[256] ;
|
||||||
|
|
||||||
|
SharedAllocationHeader head ;
|
||||||
|
|
||||||
|
if ( detail ) {
|
||||||
|
do {
|
||||||
|
if ( r->m_alloc_ptr ) {
|
||||||
|
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
head.m_label[0] = 0 ;
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf( buffer , 256 , "Cuda addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n"
|
||||||
|
, reinterpret_cast<unsigned long>( r )
|
||||||
|
, reinterpret_cast<unsigned long>( r->m_prev )
|
||||||
|
, reinterpret_cast<unsigned long>( r->m_next )
|
||||||
|
, reinterpret_cast<unsigned long>( r->m_alloc_ptr )
|
||||||
|
, r->m_alloc_size
|
||||||
|
, r->m_count
|
||||||
|
, reinterpret_cast<unsigned long>( r->m_dealloc )
|
||||||
|
, head.m_label
|
||||||
|
);
|
||||||
|
std::cout << buffer ;
|
||||||
|
r = r->m_next ;
|
||||||
|
} while ( r != & s_root_record );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
do {
|
||||||
|
if ( r->m_alloc_ptr ) {
|
||||||
|
|
||||||
|
Kokkos::Impl::DeepCopy<HostSpace,CudaSpace>::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) );
|
||||||
|
|
||||||
|
snprintf( buffer , 256 , "Cuda [ 0x%.12lx + %ld ] %s\n"
|
||||||
|
, reinterpret_cast< unsigned long >( r->data() )
|
||||||
|
, r->size()
|
||||||
|
, head.m_label
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
snprintf( buffer , 256 , "Cuda [ 0 + 0 ]\n" );
|
||||||
|
}
|
||||||
|
std::cout << buffer ;
|
||||||
|
r = r->m_next ;
|
||||||
|
} while ( r != & s_root_record );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
SharedAllocationRecord< Kokkos::CudaUVMSpace , void >::
|
||||||
|
print_records( std::ostream & s , const Kokkos::CudaUVMSpace & space , bool detail )
|
||||||
|
{
|
||||||
|
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "CudaUVM" , & s_root_record , detail );
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >::
|
||||||
|
print_records( std::ostream & s , const Kokkos::CudaHostPinnedSpace & space , bool detail )
|
||||||
|
{
|
||||||
|
SharedAllocationRecord< void , void >::print_host_accessible_records( s , "CudaHostPinned" , & s_root_record , detail );
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Experimental
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace {
|
||||||
|
__global__ void init_lock_array_kernel() {
|
||||||
|
unsigned i = blockIdx.x*blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
|
if(i<CUDA_SPACE_ATOMIC_MASK+1)
|
||||||
|
kokkos_impl_cuda_atomic_lock_array[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
int* lock_array_cuda_space_ptr(bool deallocate) {
|
||||||
|
static int* ptr = NULL;
|
||||||
|
if(deallocate) {
|
||||||
|
cudaFree(ptr);
|
||||||
|
ptr = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(ptr==NULL && !deallocate)
|
||||||
|
cudaMalloc(&ptr,sizeof(int)*(CUDA_SPACE_ATOMIC_MASK+1));
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void init_lock_array_cuda_space() {
|
||||||
|
int is_initialized = 0;
|
||||||
|
if(! is_initialized) {
|
||||||
|
int* lock_array_ptr = lock_array_cuda_space_ptr();
|
||||||
|
cudaMemcpyToSymbol( kokkos_impl_cuda_atomic_lock_array , & lock_array_ptr , sizeof(int*) );
|
||||||
|
init_lock_array_kernel<<<(CUDA_SPACE_ATOMIC_MASK+255)/256,256>>>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
183
lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp
Executable file
183
lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp
Executable file
@ -0,0 +1,183 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP
|
||||||
|
#define KOKKOS_CUDA_ALLOCATION_TRACKING_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
|
||||||
|
/* only compile this file if CUDA is enabled for Kokkos */
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
#include <impl/Kokkos_Traits.hpp>
|
||||||
|
#include <impl/Kokkos_AllocationTracker.hpp> // AllocatorAttributeBase
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template< class DestructFunctor >
|
||||||
|
SharedAllocationRecord *
|
||||||
|
shared_allocation_record( Kokkos::CudaSpace const & arg_space
|
||||||
|
, void * const arg_alloc_ptr
|
||||||
|
, DestructFunctor const & arg_destruct )
|
||||||
|
{
|
||||||
|
SharedAllocationRecord * const record = SharedAllocationRecord::get_record( arg_alloc_ptr );
|
||||||
|
|
||||||
|
// assert: record != 0
|
||||||
|
|
||||||
|
// assert: sizeof(DestructFunctor) <= record->m_destruct_size
|
||||||
|
|
||||||
|
// assert: record->m_destruct_function == 0
|
||||||
|
|
||||||
|
DestructFunctor * const functor =
|
||||||
|
reinterpret_cast< DestructFunctor * >(
|
||||||
|
reinterpret_cast< unsigned long >( record ) + sizeof(SharedAllocationRecord) );
|
||||||
|
|
||||||
|
new( functor ) DestructFunctor( arg_destruct );
|
||||||
|
|
||||||
|
record->m_destruct_functor = & shared_allocation_destroy< DestructFunctor > ;
|
||||||
|
|
||||||
|
return record ;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// class CudaUnmanagedAllocator
|
||||||
|
/// does nothing when deallocate(ptr,size) is called
|
||||||
|
struct CudaUnmanagedAllocator
|
||||||
|
{
|
||||||
|
static const char * name()
|
||||||
|
{
|
||||||
|
return "Cuda Unmanaged Allocator";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
|
||||||
|
|
||||||
|
static bool support_texture_binding() { return true; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/// class CudaUnmanagedAllocator
|
||||||
|
/// does nothing when deallocate(ptr,size) is called
|
||||||
|
struct CudaUnmanagedUVMAllocator
|
||||||
|
{
|
||||||
|
static const char * name()
|
||||||
|
{
|
||||||
|
return "Cuda Unmanaged UVM Allocator";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
|
||||||
|
|
||||||
|
static bool support_texture_binding() { return true; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/// class CudaUnmanagedHostAllocator
|
||||||
|
/// does nothing when deallocate(ptr,size) is called
|
||||||
|
class CudaUnmanagedHostAllocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static const char * name()
|
||||||
|
{
|
||||||
|
return "Cuda Unmanaged Host Allocator";
|
||||||
|
}
|
||||||
|
// Unmanaged deallocate does nothing
|
||||||
|
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// class CudaMallocAllocator
|
||||||
|
class CudaMallocAllocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static const char * name()
|
||||||
|
{
|
||||||
|
return "Cuda Malloc Allocator";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* allocate(size_t size);
|
||||||
|
|
||||||
|
static void deallocate(void * ptr, size_t);
|
||||||
|
|
||||||
|
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
|
||||||
|
|
||||||
|
static bool support_texture_binding() { return true; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/// class CudaUVMAllocator
|
||||||
|
class CudaUVMAllocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static const char * name()
|
||||||
|
{
|
||||||
|
return "Cuda UVM Allocator";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* allocate(size_t size);
|
||||||
|
|
||||||
|
static void deallocate(void * ptr, size_t);
|
||||||
|
|
||||||
|
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
|
||||||
|
|
||||||
|
static bool support_texture_binding() { return true; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/// class CudaHostAllocator
|
||||||
|
class CudaHostAllocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static const char * name()
|
||||||
|
{
|
||||||
|
return "Cuda Host Allocator";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* allocate(size_t size);
|
||||||
|
|
||||||
|
static void deallocate(void * ptr, size_t);
|
||||||
|
|
||||||
|
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
}} // namespace Kokkos::Impl
|
||||||
|
|
||||||
|
#endif //KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
#endif // #ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP
|
||||||
|
|
||||||
192
lib/kokkos/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
Executable file
192
lib/kokkos/core/src/Cuda/Kokkos_Cuda_BasicAllocators.cpp
Executable file
@ -0,0 +1,192 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
|
||||||
|
/* only compile this file if CUDA is enabled for Kokkos */
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
#include <impl/Kokkos_Error.hpp>
|
||||||
|
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
|
||||||
|
#include <Cuda/Kokkos_Cuda_Error.hpp>
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
namespace Kokkos { namespace Impl {
|
||||||
|
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
TextureAttribute::TextureAttribute( void * const alloc_ptr
|
||||||
|
, size_t alloc_size
|
||||||
|
, cudaChannelFormatDesc const & desc
|
||||||
|
)
|
||||||
|
: m_tex_obj(0)
|
||||||
|
{
|
||||||
|
cuda_device_synchronize();
|
||||||
|
|
||||||
|
struct cudaResourceDesc resDesc ;
|
||||||
|
struct cudaTextureDesc texDesc ;
|
||||||
|
|
||||||
|
memset( & resDesc , 0 , sizeof(resDesc) );
|
||||||
|
memset( & texDesc , 0 , sizeof(texDesc) );
|
||||||
|
|
||||||
|
resDesc.resType = cudaResourceTypeLinear ;
|
||||||
|
resDesc.res.linear.desc = desc ;
|
||||||
|
resDesc.res.linear.sizeInBytes = alloc_size ;
|
||||||
|
resDesc.res.linear.devPtr = alloc_ptr ;
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL( cudaCreateTextureObject( & m_tex_obj , & resDesc, & texDesc, NULL) );
|
||||||
|
|
||||||
|
cuda_device_synchronize();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TextureAttribute::~TextureAttribute()
|
||||||
|
{
|
||||||
|
if (m_tex_obj) {
|
||||||
|
cudaDestroyTextureObject( m_tex_obj );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
void * CudaMallocAllocator::allocate( size_t size )
|
||||||
|
{
|
||||||
|
void * ptr = NULL;
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL( cudaMalloc( &ptr, size ) );
|
||||||
|
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CudaMallocAllocator::deallocate( void * ptr, size_t /*size*/ )
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
CUDA_SAFE_CALL( cudaFree( ptr ) );
|
||||||
|
} catch(...) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
void * CudaMallocAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
|
||||||
|
{
|
||||||
|
void * ptr = old_ptr;
|
||||||
|
if (old_size != new_size) {
|
||||||
|
ptr = allocate( new_size );
|
||||||
|
size_t copy_size = old_size < new_size ? old_size : new_size;
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL( cudaMemcpy( ptr , old_ptr , copy_size , cudaMemcpyDefault ) );
|
||||||
|
|
||||||
|
deallocate( old_ptr, old_size );
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
void * CudaUVMAllocator::allocate( size_t size )
|
||||||
|
{
|
||||||
|
#if defined( CUDA_VERSION ) && ( 6000 <= CUDA_VERSION )
|
||||||
|
void * ptr = NULL;
|
||||||
|
CUDA_SAFE_CALL( cudaMallocManaged( &ptr, size, cudaMemAttachGlobal ) );
|
||||||
|
return ptr;
|
||||||
|
#else
|
||||||
|
throw_runtime_exception( "CUDA VERSION does not support UVM" );
|
||||||
|
return NULL;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void CudaUVMAllocator::deallocate( void * ptr, size_t /*size*/ )
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
CUDA_SAFE_CALL( cudaFree( ptr ) );
|
||||||
|
} catch(...) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
void * CudaUVMAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
|
||||||
|
{
|
||||||
|
void * ptr = old_ptr;
|
||||||
|
if (old_size != new_size) {
|
||||||
|
ptr = allocate( new_size );
|
||||||
|
size_t copy_size = old_size < new_size ? old_size : new_size;
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL( cudaMemcpy( ptr , old_ptr , copy_size , cudaMemcpyDefault ) );
|
||||||
|
|
||||||
|
deallocate( old_ptr, old_size );
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
void * CudaHostAllocator::allocate( size_t size )
|
||||||
|
{
|
||||||
|
void * ptr = NULL;
|
||||||
|
CUDA_SAFE_CALL( cudaHostAlloc( &ptr , size , cudaHostAllocDefault ) );
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CudaHostAllocator::deallocate( void * ptr, size_t /*size*/ )
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
CUDA_SAFE_CALL( cudaFreeHost( ptr ) );
|
||||||
|
} catch(...) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
void * CudaHostAllocator::reallocate(void * old_ptr, size_t old_size, size_t new_size)
|
||||||
|
{
|
||||||
|
void * ptr = old_ptr;
|
||||||
|
if (old_size != new_size) {
|
||||||
|
ptr = allocate( new_size );
|
||||||
|
size_t copy_size = old_size < new_size ? old_size : new_size;
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL( cudaMemcpy( ptr , old_ptr , copy_size , cudaMemcpyHostToHost ) );
|
||||||
|
|
||||||
|
deallocate( old_ptr, old_size );
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
}} // namespace Kokkos::Impl
|
||||||
|
|
||||||
|
#endif //KOKKOS_HAVE_CUDA
|
||||||
187
lib/kokkos/core/src/Cuda/Kokkos_Cuda_BasicAllocators.hpp
Executable file
187
lib/kokkos/core/src/Cuda/Kokkos_Cuda_BasicAllocators.hpp
Executable file
@ -0,0 +1,187 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_CUDA_BASIC_ALLOCATORS_HPP
|
||||||
|
#define KOKKOS_CUDA_BASIC_ALLOCATORS_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
|
||||||
|
/* only compile this file if CUDA is enabled for Kokkos */
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
#include <impl/Kokkos_Traits.hpp>
|
||||||
|
#include <impl/Kokkos_AllocationTracker.hpp> // AllocatorAttributeBase
|
||||||
|
|
||||||
|
namespace Kokkos { namespace Impl {
|
||||||
|
|
||||||
|
|
||||||
|
// Cuda 5.0 <texture_types.h> defines 'cudaTextureObject_t'
|
||||||
|
// to be an 'unsigned long long'. This chould change with
|
||||||
|
// future version of Cuda and this typedef would have to
|
||||||
|
// change accordingly.
|
||||||
|
|
||||||
|
#if defined( CUDA_VERSION ) && ( 5000 <= CUDA_VERSION )
|
||||||
|
|
||||||
|
typedef enable_if<
|
||||||
|
sizeof(::cudaTextureObject_t) == sizeof(const void *) ,
|
||||||
|
::cudaTextureObject_t >::type cuda_texture_object_type ;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
typedef const void * cuda_texture_object_type ;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
struct TextureAttribute : public AllocatorAttributeBase
|
||||||
|
{
|
||||||
|
cuda_texture_object_type m_tex_obj ;
|
||||||
|
|
||||||
|
TextureAttribute( void * const alloc_ptr
|
||||||
|
, size_t alloc_size
|
||||||
|
, cudaChannelFormatDesc const & desc
|
||||||
|
);
|
||||||
|
|
||||||
|
~TextureAttribute();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/// class CudaUnmanagedAllocator
|
||||||
|
/// does nothing when deallocate(ptr,size) is called
|
||||||
|
struct CudaUnmanagedAllocator
|
||||||
|
{
|
||||||
|
static const char * name()
|
||||||
|
{
|
||||||
|
return "Cuda Unmanaged Allocator";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
|
||||||
|
|
||||||
|
static bool support_texture_binding() { return true; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/// class CudaUnmanagedAllocator
|
||||||
|
/// does nothing when deallocate(ptr,size) is called
|
||||||
|
struct CudaUnmanagedUVMAllocator
|
||||||
|
{
|
||||||
|
static const char * name()
|
||||||
|
{
|
||||||
|
return "Cuda Unmanaged UVM Allocator";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
|
||||||
|
|
||||||
|
static bool support_texture_binding() { return true; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/// class CudaUnmanagedHostAllocator
|
||||||
|
/// does nothing when deallocate(ptr,size) is called
|
||||||
|
class CudaUnmanagedHostAllocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static const char * name()
|
||||||
|
{
|
||||||
|
return "Cuda Unmanaged Host Allocator";
|
||||||
|
}
|
||||||
|
// Unmanaged deallocate does nothing
|
||||||
|
static void deallocate(void * /*ptr*/, size_t /*size*/) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// class CudaMallocAllocator
|
||||||
|
class CudaMallocAllocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static const char * name()
|
||||||
|
{
|
||||||
|
return "Cuda Malloc Allocator";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* allocate(size_t size);
|
||||||
|
|
||||||
|
static void deallocate(void * ptr, size_t);
|
||||||
|
|
||||||
|
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
|
||||||
|
|
||||||
|
static bool support_texture_binding() { return true; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/// class CudaUVMAllocator
|
||||||
|
class CudaUVMAllocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static const char * name()
|
||||||
|
{
|
||||||
|
return "Cuda UVM Allocator";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* allocate(size_t size);
|
||||||
|
|
||||||
|
static void deallocate(void * ptr, size_t);
|
||||||
|
|
||||||
|
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
|
||||||
|
|
||||||
|
static bool support_texture_binding() { return true; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/// class CudaHostAllocator
|
||||||
|
class CudaHostAllocator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static const char * name()
|
||||||
|
{
|
||||||
|
return "Cuda Host Allocator";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* allocate(size_t size);
|
||||||
|
|
||||||
|
static void deallocate(void * ptr, size_t);
|
||||||
|
|
||||||
|
static void * reallocate(void * old_ptr, size_t old_size, size_t new_size);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
}} // namespace Kokkos::Impl
|
||||||
|
|
||||||
|
#endif //KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
#endif //KOKKOS_CUDA_BASIC_ALLOCATORS_HPP
|
||||||
69
lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp
Executable file
69
lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp
Executable file
@ -0,0 +1,69 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_CUDA_ERROR_HPP
|
||||||
|
#define KOKKOS_CUDA_ERROR_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
|
||||||
|
/* only compile this file if CUDA is enabled for Kokkos */
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
namespace Kokkos { namespace Impl {
|
||||||
|
|
||||||
|
void cuda_device_synchronize();
|
||||||
|
|
||||||
|
void cuda_internal_error_throw( cudaError e , const char * name, const char * file = NULL, const int line = 0 );
|
||||||
|
|
||||||
|
inline void cuda_internal_safe_call( cudaError e , const char * name, const char * file = NULL, const int line = 0)
|
||||||
|
{
|
||||||
|
if ( cudaSuccess != e ) { cuda_internal_error_throw( e , name, file, line ); }
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CUDA_SAFE_CALL( call ) \
|
||||||
|
Kokkos::Impl::cuda_internal_safe_call( call , #call, __FILE__, __LINE__ )
|
||||||
|
|
||||||
|
}} // namespace Kokkos::Impl
|
||||||
|
|
||||||
|
#endif //KOKKOS_HAVE_CUDA
|
||||||
|
#endif //KOKKOS_CUDA_ERROR_HPP
|
||||||
678
lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
Executable file
678
lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
Executable file
@ -0,0 +1,678 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/* Kokkos interfaces */
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
|
||||||
|
/* only compile this file if CUDA is enabled for Kokkos */
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
#include <Cuda/Kokkos_Cuda_Error.hpp>
|
||||||
|
#include <Cuda/Kokkos_Cuda_Internal.hpp>
|
||||||
|
#include <impl/Kokkos_AllocationTracker.hpp>
|
||||||
|
#include <impl/Kokkos_Error.hpp>
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/* Standard 'C' libraries */
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
/* Standard 'C++' libraries */
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||||
|
__device__ __constant__
|
||||||
|
Kokkos::Impl::CudaTraits::ConstantGlobalBufferType
|
||||||
|
kokkos_impl_cuda_constant_memory_buffer ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
__global__
|
||||||
|
void query_cuda_kernel_arch( int * d_arch )
|
||||||
|
{
|
||||||
|
#if defined( __CUDA_ARCH__ )
|
||||||
|
*d_arch = __CUDA_ARCH__ ;
|
||||||
|
#else
|
||||||
|
*d_arch = 0 ;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Query what compute capability is actually launched to the device: */
|
||||||
|
int cuda_kernel_arch()
|
||||||
|
{
|
||||||
|
int * d_arch = 0 ;
|
||||||
|
cudaMalloc( (void **) & d_arch , sizeof(int) );
|
||||||
|
query_cuda_kernel_arch<<<1,1>>>( d_arch );
|
||||||
|
int arch = 0 ;
|
||||||
|
cudaMemcpy( & arch , d_arch , sizeof(int) , cudaMemcpyDefault );
|
||||||
|
cudaFree( d_arch );
|
||||||
|
return arch ;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool cuda_launch_blocking()
|
||||||
|
{
|
||||||
|
const char * env = getenv("CUDA_LAUNCH_BLOCKING");
|
||||||
|
|
||||||
|
if (env == 0) return false;
|
||||||
|
|
||||||
|
return atoi(env);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void cuda_device_synchronize()
|
||||||
|
{
|
||||||
|
// static const bool launch_blocking = cuda_launch_blocking();
|
||||||
|
|
||||||
|
// if (!launch_blocking) {
|
||||||
|
CUDA_SAFE_CALL( cudaDeviceSynchronize() );
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
|
void cuda_internal_error_throw( cudaError e , const char * name, const char * file, const int line )
|
||||||
|
{
|
||||||
|
std::ostringstream out ;
|
||||||
|
out << name << " error( " << cudaGetErrorName(e) << "): " << cudaGetErrorString(e);
|
||||||
|
if (file) {
|
||||||
|
out << " " << file << ":" << line;
|
||||||
|
}
|
||||||
|
throw_runtime_exception( out.str() );
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Some significant cuda device properties:
|
||||||
|
//
|
||||||
|
// cudaDeviceProp::name : Text label for device
|
||||||
|
// cudaDeviceProp::major : Device major number
|
||||||
|
// cudaDeviceProp::minor : Device minor number
|
||||||
|
// cudaDeviceProp::warpSize : number of threads per warp
|
||||||
|
// cudaDeviceProp::multiProcessorCount : number of multiprocessors
|
||||||
|
// cudaDeviceProp::sharedMemPerBlock : capacity of shared memory per block
|
||||||
|
// cudaDeviceProp::totalConstMem : capacity of constant memory
|
||||||
|
// cudaDeviceProp::totalGlobalMem : capacity of global memory
|
||||||
|
// cudaDeviceProp::maxGridSize[3] : maximum grid size
|
||||||
|
|
||||||
|
//
|
||||||
|
// Section 4.4.2.4 of the CUDA Toolkit Reference Manual
|
||||||
|
//
|
||||||
|
// struct cudaDeviceProp {
|
||||||
|
// char name[256];
|
||||||
|
// size_t totalGlobalMem;
|
||||||
|
// size_t sharedMemPerBlock;
|
||||||
|
// int regsPerBlock;
|
||||||
|
// int warpSize;
|
||||||
|
// size_t memPitch;
|
||||||
|
// int maxThreadsPerBlock;
|
||||||
|
// int maxThreadsDim[3];
|
||||||
|
// int maxGridSize[3];
|
||||||
|
// size_t totalConstMem;
|
||||||
|
// int major;
|
||||||
|
// int minor;
|
||||||
|
// int clockRate;
|
||||||
|
// size_t textureAlignment;
|
||||||
|
// int deviceOverlap;
|
||||||
|
// int multiProcessorCount;
|
||||||
|
// int kernelExecTimeoutEnabled;
|
||||||
|
// int integrated;
|
||||||
|
// int canMapHostMemory;
|
||||||
|
// int computeMode;
|
||||||
|
// int concurrentKernels;
|
||||||
|
// int ECCEnabled;
|
||||||
|
// int pciBusID;
|
||||||
|
// int pciDeviceID;
|
||||||
|
// int tccDriver;
|
||||||
|
// int asyncEngineCount;
|
||||||
|
// int unifiedAddressing;
|
||||||
|
// int memoryClockRate;
|
||||||
|
// int memoryBusWidth;
|
||||||
|
// int l2CacheSize;
|
||||||
|
// int maxThreadsPerMultiProcessor;
|
||||||
|
// };
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CudaInternalDevices {
|
||||||
|
public:
|
||||||
|
enum { MAXIMUM_DEVICE_COUNT = 8 };
|
||||||
|
struct cudaDeviceProp m_cudaProp[ MAXIMUM_DEVICE_COUNT ] ;
|
||||||
|
int m_cudaDevCount ;
|
||||||
|
|
||||||
|
CudaInternalDevices();
|
||||||
|
|
||||||
|
static const CudaInternalDevices & singleton();
|
||||||
|
};
|
||||||
|
|
||||||
|
CudaInternalDevices::CudaInternalDevices()
|
||||||
|
{
|
||||||
|
// See 'cudaSetDeviceFlags' for host-device thread interaction
|
||||||
|
// Section 4.4.2.6 of the CUDA Toolkit Reference Manual
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL (cudaGetDeviceCount( & m_cudaDevCount ) );
|
||||||
|
|
||||||
|
for ( int i = 0 ; i < m_cudaDevCount ; ++i ) {
|
||||||
|
CUDA_SAFE_CALL( cudaGetDeviceProperties( m_cudaProp + i , i ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const CudaInternalDevices & CudaInternalDevices::singleton()
|
||||||
|
{
|
||||||
|
static CudaInternalDevices self ; return self ;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class CudaInternal {
|
||||||
|
private:
|
||||||
|
|
||||||
|
CudaInternal( const CudaInternal & );
|
||||||
|
CudaInternal & operator = ( const CudaInternal & );
|
||||||
|
|
||||||
|
AllocationTracker m_scratchFlagsTracker;
|
||||||
|
AllocationTracker m_scratchSpaceTracker;
|
||||||
|
AllocationTracker m_scratchUnifiedTracker;
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef Cuda::size_type size_type ;
|
||||||
|
|
||||||
|
int m_cudaDev ;
|
||||||
|
int m_cudaArch ;
|
||||||
|
unsigned m_maxWarpCount ;
|
||||||
|
unsigned m_maxBlock ;
|
||||||
|
unsigned m_maxSharedWords ;
|
||||||
|
size_type m_scratchSpaceCount ;
|
||||||
|
size_type m_scratchFlagsCount ;
|
||||||
|
size_type m_scratchUnifiedCount ;
|
||||||
|
size_type m_scratchUnifiedSupported ;
|
||||||
|
size_type m_streamCount ;
|
||||||
|
size_type * m_scratchSpace ;
|
||||||
|
size_type * m_scratchFlags ;
|
||||||
|
size_type * m_scratchUnified ;
|
||||||
|
cudaStream_t * m_stream ;
|
||||||
|
|
||||||
|
|
||||||
|
static CudaInternal & singleton();
|
||||||
|
|
||||||
|
int verify_is_initialized( const char * const label ) const ;
|
||||||
|
|
||||||
|
int is_initialized() const
|
||||||
|
{ return 0 != m_scratchSpace && 0 != m_scratchFlags ; }
|
||||||
|
|
||||||
|
void initialize( int cuda_device_id , int stream_count );
|
||||||
|
void finalize();
|
||||||
|
|
||||||
|
void print_configuration( std::ostream & ) const ;
|
||||||
|
|
||||||
|
~CudaInternal();
|
||||||
|
|
||||||
|
CudaInternal()
|
||||||
|
: m_cudaDev( -1 )
|
||||||
|
, m_cudaArch( -1 )
|
||||||
|
, m_maxWarpCount( 0 )
|
||||||
|
, m_maxBlock( 0 )
|
||||||
|
, m_maxSharedWords( 0 )
|
||||||
|
, m_scratchSpaceCount( 0 )
|
||||||
|
, m_scratchFlagsCount( 0 )
|
||||||
|
, m_scratchUnifiedCount( 0 )
|
||||||
|
, m_scratchUnifiedSupported( 0 )
|
||||||
|
, m_streamCount( 0 )
|
||||||
|
, m_scratchSpace( 0 )
|
||||||
|
, m_scratchFlags( 0 )
|
||||||
|
, m_scratchUnified( 0 )
|
||||||
|
, m_stream( 0 )
|
||||||
|
{}
|
||||||
|
|
||||||
|
size_type * scratch_space( const size_type size );
|
||||||
|
size_type * scratch_flags( const size_type size );
|
||||||
|
size_type * scratch_unified( const size_type size );
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
void CudaInternal::print_configuration( std::ostream & s ) const
|
||||||
|
{
|
||||||
|
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_CUDA )
|
||||||
|
s << "macro KOKKOS_HAVE_CUDA : defined" << std::endl ;
|
||||||
|
#endif
|
||||||
|
#if defined( CUDA_VERSION )
|
||||||
|
s << "macro CUDA_VERSION = " << CUDA_VERSION
|
||||||
|
<< " = version " << CUDA_VERSION / 1000
|
||||||
|
<< "." << ( CUDA_VERSION % 1000 ) / 10
|
||||||
|
<< std::endl ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for ( int i = 0 ; i < dev_info.m_cudaDevCount ; ++i ) {
|
||||||
|
s << "Kokkos::Cuda[ " << i << " ] "
|
||||||
|
<< dev_info.m_cudaProp[i].name
|
||||||
|
<< " capability " << dev_info.m_cudaProp[i].major << "." << dev_info.m_cudaProp[i].minor
|
||||||
|
<< ", Total Global Memory: " << human_memory_size(dev_info.m_cudaProp[i].totalGlobalMem)
|
||||||
|
<< ", Shared Memory per Block: " << human_memory_size(dev_info.m_cudaProp[i].sharedMemPerBlock);
|
||||||
|
if ( m_cudaDev == i ) s << " : Selected" ;
|
||||||
|
s << std::endl ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
CudaInternal::~CudaInternal()
|
||||||
|
{
|
||||||
|
if ( m_stream ||
|
||||||
|
m_scratchSpace ||
|
||||||
|
m_scratchFlags ||
|
||||||
|
m_scratchUnified ) {
|
||||||
|
std::cerr << "Kokkos::Cuda ERROR: Failed to call Kokkos::Cuda::finalize()"
|
||||||
|
<< std::endl ;
|
||||||
|
std::cerr.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
m_cudaDev = -1 ;
|
||||||
|
m_cudaArch = -1 ;
|
||||||
|
m_maxWarpCount = 0 ;
|
||||||
|
m_maxBlock = 0 ;
|
||||||
|
m_maxSharedWords = 0 ;
|
||||||
|
m_scratchSpaceCount = 0 ;
|
||||||
|
m_scratchFlagsCount = 0 ;
|
||||||
|
m_scratchUnifiedCount = 0 ;
|
||||||
|
m_scratchUnifiedSupported = 0 ;
|
||||||
|
m_streamCount = 0 ;
|
||||||
|
m_scratchSpace = 0 ;
|
||||||
|
m_scratchFlags = 0 ;
|
||||||
|
m_scratchUnified = 0 ;
|
||||||
|
m_stream = 0 ;
|
||||||
|
}
|
||||||
|
|
||||||
|
int CudaInternal::verify_is_initialized( const char * const label ) const
|
||||||
|
{
|
||||||
|
if ( m_cudaDev < 0 ) {
|
||||||
|
std::cerr << "Kokkos::Cuda::" << label << " : ERROR device not initialized" << std::endl ;
|
||||||
|
}
|
||||||
|
return 0 <= m_cudaDev ;
|
||||||
|
}
|
||||||
|
|
||||||
|
CudaInternal & CudaInternal::singleton()
|
||||||
|
{
|
||||||
|
static CudaInternal self ;
|
||||||
|
return self ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CudaInternal::initialize( int cuda_device_id , int stream_count )
|
||||||
|
{
|
||||||
|
enum { WordSize = sizeof(size_type) };
|
||||||
|
|
||||||
|
if ( ! HostSpace::execution_space::is_initialized() ) {
|
||||||
|
const std::string msg("Cuda::initialize ERROR : HostSpace::execution_space is not initialized");
|
||||||
|
throw_runtime_exception( msg );
|
||||||
|
}
|
||||||
|
|
||||||
|
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
|
||||||
|
|
||||||
|
const bool ok_init = 0 == m_scratchSpace || 0 == m_scratchFlags ;
|
||||||
|
|
||||||
|
const bool ok_id = 0 <= cuda_device_id &&
|
||||||
|
cuda_device_id < dev_info.m_cudaDevCount ;
|
||||||
|
|
||||||
|
// Need device capability 2.0 or better
|
||||||
|
|
||||||
|
const bool ok_dev = ok_id &&
|
||||||
|
( 2 <= dev_info.m_cudaProp[ cuda_device_id ].major &&
|
||||||
|
0 <= dev_info.m_cudaProp[ cuda_device_id ].minor );
|
||||||
|
|
||||||
|
if ( ok_init && ok_dev ) {
|
||||||
|
|
||||||
|
const struct cudaDeviceProp & cudaProp =
|
||||||
|
dev_info.m_cudaProp[ cuda_device_id ];
|
||||||
|
|
||||||
|
m_cudaDev = cuda_device_id ;
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL( cudaSetDevice( m_cudaDev ) );
|
||||||
|
CUDA_SAFE_CALL( cudaDeviceReset() );
|
||||||
|
Kokkos::Impl::cuda_device_synchronize();
|
||||||
|
|
||||||
|
// Query what compute capability architecture a kernel executes:
|
||||||
|
m_cudaArch = cuda_kernel_arch();
|
||||||
|
|
||||||
|
if ( m_cudaArch != cudaProp.major * 100 + cudaProp.minor * 10 ) {
|
||||||
|
std::cerr << "Kokkos::Cuda::initialize WARNING: running kernels compiled for compute capability "
|
||||||
|
<< ( m_cudaArch / 100 ) << "." << ( ( m_cudaArch % 100 ) / 10 )
|
||||||
|
<< " on device with compute capability "
|
||||||
|
<< cudaProp.major << "." << cudaProp.minor
|
||||||
|
<< " , this will likely reduce potential performance."
|
||||||
|
<< std::endl ;
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------
|
||||||
|
// Maximum number of warps,
|
||||||
|
// at most one warp per thread in a warp for reduction.
|
||||||
|
|
||||||
|
// HCE 2012-February :
|
||||||
|
// Found bug in CUDA 4.1 that sometimes a kernel launch would fail
|
||||||
|
// if the thread count == 1024 and a functor is passed to the kernel.
|
||||||
|
// Copying the kernel to constant memory and then launching with
|
||||||
|
// thread count == 1024 would work fine.
|
||||||
|
//
|
||||||
|
// HCE 2012-October :
|
||||||
|
// All compute capabilities support at least 16 warps (512 threads).
|
||||||
|
// However, we have found that 8 warps typically gives better performance.
|
||||||
|
|
||||||
|
m_maxWarpCount = 8 ;
|
||||||
|
|
||||||
|
// m_maxWarpCount = cudaProp.maxThreadsPerBlock / Impl::CudaTraits::WarpSize ;
|
||||||
|
|
||||||
|
if ( Impl::CudaTraits::WarpSize < m_maxWarpCount ) {
|
||||||
|
m_maxWarpCount = Impl::CudaTraits::WarpSize ;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_maxSharedWords = cudaProp.sharedMemPerBlock / WordSize ;
|
||||||
|
|
||||||
|
//----------------------------------
|
||||||
|
// Maximum number of blocks:
|
||||||
|
|
||||||
|
m_maxBlock = m_cudaArch < 300 ? 65535 : cudaProp.maxGridSize[0] ;
|
||||||
|
|
||||||
|
//----------------------------------
|
||||||
|
|
||||||
|
m_scratchUnifiedSupported = cudaProp.unifiedAddressing ;
|
||||||
|
|
||||||
|
if ( ! m_scratchUnifiedSupported ) {
|
||||||
|
std::cout << "Kokkos::Cuda device "
|
||||||
|
<< cudaProp.name << " capability "
|
||||||
|
<< cudaProp.major << "." << cudaProp.minor
|
||||||
|
<< " does not support unified virtual address space"
|
||||||
|
<< std::endl ;
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------
|
||||||
|
// Multiblock reduction uses scratch flags for counters
|
||||||
|
// and scratch space for partial reduction values.
|
||||||
|
// Allocate some initial space. This will grow as needed.
|
||||||
|
|
||||||
|
{
|
||||||
|
const unsigned reduce_block_count = m_maxWarpCount * Impl::CudaTraits::WarpSize ;
|
||||||
|
|
||||||
|
(void) scratch_unified( 16 * sizeof(size_type) );
|
||||||
|
(void) scratch_flags( reduce_block_count * 2 * sizeof(size_type) );
|
||||||
|
(void) scratch_space( reduce_block_count * 16 * sizeof(size_type) );
|
||||||
|
}
|
||||||
|
//----------------------------------
|
||||||
|
|
||||||
|
if ( stream_count ) {
|
||||||
|
m_stream = (cudaStream_t*) ::malloc( stream_count * sizeof(cudaStream_t) );
|
||||||
|
m_streamCount = stream_count ;
|
||||||
|
for ( size_type i = 0 ; i < m_streamCount ; ++i ) m_stream[i] = 0 ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
|
||||||
|
std::ostringstream msg ;
|
||||||
|
msg << "Kokkos::Cuda::initialize(" << cuda_device_id << ") FAILED" ;
|
||||||
|
|
||||||
|
if ( ! ok_init ) {
|
||||||
|
msg << " : Already initialized" ;
|
||||||
|
}
|
||||||
|
if ( ! ok_id ) {
|
||||||
|
msg << " : Device identifier out of range "
|
||||||
|
<< "[0.." << dev_info.m_cudaDevCount << "]" ;
|
||||||
|
}
|
||||||
|
else if ( ! ok_dev ) {
|
||||||
|
msg << " : Device " ;
|
||||||
|
msg << dev_info.m_cudaProp[ cuda_device_id ].major ;
|
||||||
|
msg << "." ;
|
||||||
|
msg << dev_info.m_cudaProp[ cuda_device_id ].minor ;
|
||||||
|
msg << " has insufficient capability, required 2.0 or better" ;
|
||||||
|
}
|
||||||
|
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init the array for used for arbitrarily sized atomics
|
||||||
|
Impl::init_lock_array_cuda_space();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
typedef Cuda::size_type ScratchGrain[ Impl::CudaTraits::WarpSize ] ;
|
||||||
|
enum { sizeScratchGrain = sizeof(ScratchGrain) };
|
||||||
|
|
||||||
|
|
||||||
|
Cuda::size_type *
|
||||||
|
CudaInternal::scratch_flags( const Cuda::size_type size )
|
||||||
|
{
|
||||||
|
if ( verify_is_initialized("scratch_flags") && m_scratchFlagsCount * sizeScratchGrain < size ) {
|
||||||
|
|
||||||
|
|
||||||
|
m_scratchFlagsCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
|
||||||
|
|
||||||
|
m_scratchFlagsTracker = CudaSpace::allocate_and_track( std::string("InternalScratchFlags") , sizeof( ScratchGrain ) * m_scratchFlagsCount );
|
||||||
|
m_scratchFlags = reinterpret_cast<size_type *>(m_scratchFlagsTracker.alloc_ptr());
|
||||||
|
|
||||||
|
CUDA_SAFE_CALL( cudaMemset( m_scratchFlags , 0 , m_scratchFlagsCount * sizeScratchGrain ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
return m_scratchFlags ;
|
||||||
|
}
|
||||||
|
|
||||||
|
Cuda::size_type *
|
||||||
|
CudaInternal::scratch_space( const Cuda::size_type size )
|
||||||
|
{
|
||||||
|
if ( verify_is_initialized("scratch_space") && m_scratchSpaceCount * sizeScratchGrain < size ) {
|
||||||
|
|
||||||
|
m_scratchSpaceCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
|
||||||
|
|
||||||
|
m_scratchSpaceTracker = CudaSpace::allocate_and_track( std::string("InternalScratchSpace") , sizeof( ScratchGrain ) * m_scratchSpaceCount );
|
||||||
|
m_scratchSpace = reinterpret_cast<size_type *>(m_scratchSpaceTracker.alloc_ptr());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return m_scratchSpace ;
|
||||||
|
}
|
||||||
|
|
||||||
|
Cuda::size_type *
|
||||||
|
CudaInternal::scratch_unified( const Cuda::size_type size )
|
||||||
|
{
|
||||||
|
if ( verify_is_initialized("scratch_unified") &&
|
||||||
|
m_scratchUnifiedSupported && m_scratchUnifiedCount * sizeScratchGrain < size ) {
|
||||||
|
|
||||||
|
m_scratchUnifiedCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
|
||||||
|
|
||||||
|
m_scratchUnifiedTracker = CudaHostPinnedSpace::allocate_and_track( std::string("InternalScratchUnified") , sizeof( ScratchGrain ) * m_scratchUnifiedCount );
|
||||||
|
m_scratchUnified = reinterpret_cast<size_type *>( m_scratchUnifiedTracker.alloc_ptr() );
|
||||||
|
}
|
||||||
|
|
||||||
|
return m_scratchUnified ;
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
void CudaInternal::finalize()
|
||||||
|
{
|
||||||
|
if ( 0 != m_scratchSpace || 0 != m_scratchFlags ) {
|
||||||
|
|
||||||
|
lock_array_cuda_space_ptr(true);
|
||||||
|
if ( m_stream ) {
|
||||||
|
for ( size_type i = 1 ; i < m_streamCount ; ++i ) {
|
||||||
|
cudaStreamDestroy( m_stream[i] );
|
||||||
|
m_stream[i] = 0 ;
|
||||||
|
}
|
||||||
|
::free( m_stream );
|
||||||
|
}
|
||||||
|
|
||||||
|
m_scratchSpaceTracker.clear();
|
||||||
|
m_scratchFlagsTracker.clear();
|
||||||
|
m_scratchUnifiedTracker.clear();
|
||||||
|
|
||||||
|
m_cudaDev = -1 ;
|
||||||
|
m_maxWarpCount = 0 ;
|
||||||
|
m_maxBlock = 0 ;
|
||||||
|
m_maxSharedWords = 0 ;
|
||||||
|
m_scratchSpaceCount = 0 ;
|
||||||
|
m_scratchFlagsCount = 0 ;
|
||||||
|
m_scratchUnifiedCount = 0 ;
|
||||||
|
m_streamCount = 0 ;
|
||||||
|
m_scratchSpace = 0 ;
|
||||||
|
m_scratchFlags = 0 ;
|
||||||
|
m_scratchUnified = 0 ;
|
||||||
|
m_stream = 0 ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
Cuda::size_type cuda_internal_maximum_warp_count()
|
||||||
|
{ return CudaInternal::singleton().m_maxWarpCount ; }
|
||||||
|
|
||||||
|
Cuda::size_type cuda_internal_maximum_grid_count()
|
||||||
|
{ return CudaInternal::singleton().m_maxBlock ; }
|
||||||
|
|
||||||
|
Cuda::size_type cuda_internal_maximum_shared_words()
|
||||||
|
{ return CudaInternal::singleton().m_maxSharedWords ; }
|
||||||
|
|
||||||
|
Cuda::size_type * cuda_internal_scratch_space( const Cuda::size_type size )
|
||||||
|
{ return CudaInternal::singleton().scratch_space( size ); }
|
||||||
|
|
||||||
|
Cuda::size_type * cuda_internal_scratch_flags( const Cuda::size_type size )
|
||||||
|
{ return CudaInternal::singleton().scratch_flags( size ); }
|
||||||
|
|
||||||
|
Cuda::size_type * cuda_internal_scratch_unified( const Cuda::size_type size )
|
||||||
|
{ return CudaInternal::singleton().scratch_unified( size ); }
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
Cuda::size_type Cuda::detect_device_count()
|
||||||
|
{ return Impl::CudaInternalDevices::singleton().m_cudaDevCount ; }
|
||||||
|
|
||||||
|
int Cuda::is_initialized()
|
||||||
|
{ return Impl::CudaInternal::singleton().is_initialized(); }
|
||||||
|
|
||||||
|
void Cuda::initialize( const Cuda::SelectDevice config , size_t num_instances )
|
||||||
|
{ Impl::CudaInternal::singleton().initialize( config.cuda_device_id , num_instances ); }
|
||||||
|
|
||||||
|
std::vector<unsigned>
|
||||||
|
Cuda::detect_device_arch()
|
||||||
|
{
|
||||||
|
const Impl::CudaInternalDevices & s = Impl::CudaInternalDevices::singleton();
|
||||||
|
|
||||||
|
std::vector<unsigned> output( s.m_cudaDevCount );
|
||||||
|
|
||||||
|
for ( int i = 0 ; i < s.m_cudaDevCount ; ++i ) {
|
||||||
|
output[i] = s.m_cudaProp[i].major * 100 + s.m_cudaProp[i].minor ;
|
||||||
|
}
|
||||||
|
|
||||||
|
return output ;
|
||||||
|
}
|
||||||
|
|
||||||
|
Cuda::size_type Cuda::device_arch()
|
||||||
|
{
|
||||||
|
const int dev_id = Impl::CudaInternal::singleton().m_cudaDev ;
|
||||||
|
|
||||||
|
int dev_arch = 0 ;
|
||||||
|
|
||||||
|
if ( 0 <= dev_id ) {
|
||||||
|
const struct cudaDeviceProp & cudaProp =
|
||||||
|
Impl::CudaInternalDevices::singleton().m_cudaProp[ dev_id ] ;
|
||||||
|
|
||||||
|
dev_arch = cudaProp.major * 100 + cudaProp.minor ;
|
||||||
|
}
|
||||||
|
|
||||||
|
return dev_arch ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Cuda::finalize()
|
||||||
|
{ Impl::CudaInternal::singleton().finalize(); }
|
||||||
|
|
||||||
|
Cuda::Cuda()
|
||||||
|
: m_device( Impl::CudaInternal::singleton().m_cudaDev )
|
||||||
|
, m_stream( 0 )
|
||||||
|
{
|
||||||
|
Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" );
|
||||||
|
}
|
||||||
|
|
||||||
|
Cuda::Cuda( const int instance_id )
|
||||||
|
: m_device( Impl::CudaInternal::singleton().m_cudaDev )
|
||||||
|
, m_stream(
|
||||||
|
Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" )
|
||||||
|
? Impl::CudaInternal::singleton().m_stream[ instance_id % Impl::CudaInternal::singleton().m_streamCount ]
|
||||||
|
: 0 )
|
||||||
|
{}
|
||||||
|
|
||||||
|
void Cuda::print_configuration( std::ostream & s , const bool )
|
||||||
|
{ Impl::CudaInternal::singleton().print_configuration( s ); }
|
||||||
|
|
||||||
|
bool Cuda::sleep() { return false ; }
|
||||||
|
|
||||||
|
bool Cuda::wake() { return true ; }
|
||||||
|
|
||||||
|
void Cuda::fence()
|
||||||
|
{
|
||||||
|
Kokkos::Impl::cuda_device_synchronize();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#endif // KOKKOS_HAVE_CUDA
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
165
lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp
Executable file
165
lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp
Executable file
@ -0,0 +1,165 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_CUDA_INTERNAL_HPP
|
||||||
|
#define KOKKOS_CUDA_INTERNAL_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
|
||||||
|
/* only compile this file if CUDA is enabled for Kokkos */
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
#include <Cuda/Kokkos_Cuda_Error.hpp>
|
||||||
|
|
||||||
|
namespace Kokkos { namespace Impl {
|
||||||
|
|
||||||
|
|
||||||
|
template<class DriverType>
|
||||||
|
int cuda_get_max_block_size(const typename DriverType::functor_type & f) {
|
||||||
|
#if ( CUDA_VERSION < 6050 )
|
||||||
|
return 256;
|
||||||
|
#else
|
||||||
|
bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) );
|
||||||
|
|
||||||
|
int numBlocks;
|
||||||
|
if(Large) {
|
||||||
|
int blockSize=32;
|
||||||
|
int sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
|
||||||
|
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||||
|
&numBlocks,
|
||||||
|
cuda_parallel_launch_constant_memory<DriverType>,
|
||||||
|
blockSize,
|
||||||
|
sharedmem);
|
||||||
|
|
||||||
|
while (blockSize<1024 && numBlocks>0) {
|
||||||
|
blockSize*=2;
|
||||||
|
sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
|
||||||
|
|
||||||
|
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||||
|
&numBlocks,
|
||||||
|
cuda_parallel_launch_constant_memory<DriverType>,
|
||||||
|
blockSize,
|
||||||
|
sharedmem);
|
||||||
|
}
|
||||||
|
if(numBlocks>0) return blockSize;
|
||||||
|
else return blockSize/2;
|
||||||
|
} else {
|
||||||
|
int blockSize=32;
|
||||||
|
int sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
|
||||||
|
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||||
|
&numBlocks,
|
||||||
|
cuda_parallel_launch_local_memory<DriverType>,
|
||||||
|
blockSize,
|
||||||
|
sharedmem);
|
||||||
|
|
||||||
|
while (blockSize<1024 && numBlocks>0) {
|
||||||
|
blockSize*=2;
|
||||||
|
sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
|
||||||
|
|
||||||
|
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||||
|
&numBlocks,
|
||||||
|
cuda_parallel_launch_local_memory<DriverType>,
|
||||||
|
blockSize,
|
||||||
|
sharedmem);
|
||||||
|
}
|
||||||
|
if(numBlocks>0) return blockSize;
|
||||||
|
else return blockSize/2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class DriverType>
|
||||||
|
int cuda_get_opt_block_size(const typename DriverType::functor_type & f) {
|
||||||
|
#if ( CUDA_VERSION < 6050 )
|
||||||
|
return 256;
|
||||||
|
#else
|
||||||
|
bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) );
|
||||||
|
|
||||||
|
int blockSize=16;
|
||||||
|
int numBlocks;
|
||||||
|
int sharedmem;
|
||||||
|
int maxOccupancy=0;
|
||||||
|
int bestBlockSize=0;
|
||||||
|
|
||||||
|
if(Large) {
|
||||||
|
while(blockSize<1024) {
|
||||||
|
blockSize*=2;
|
||||||
|
|
||||||
|
//calculate the occupancy with that optBlockSize and check whether its larger than the largest one found so far
|
||||||
|
sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
|
||||||
|
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||||
|
&numBlocks,
|
||||||
|
cuda_parallel_launch_constant_memory<DriverType>,
|
||||||
|
blockSize,
|
||||||
|
sharedmem);
|
||||||
|
if(maxOccupancy < numBlocks*blockSize) {
|
||||||
|
maxOccupancy = numBlocks*blockSize;
|
||||||
|
bestBlockSize = blockSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
while(blockSize<1024) {
|
||||||
|
blockSize*=2;
|
||||||
|
sharedmem = FunctorTeamShmemSize< typename DriverType::functor_type >::value( f , blockSize );
|
||||||
|
|
||||||
|
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||||
|
&numBlocks,
|
||||||
|
cuda_parallel_launch_local_memory<DriverType>,
|
||||||
|
blockSize,
|
||||||
|
sharedmem);
|
||||||
|
|
||||||
|
if(maxOccupancy < numBlocks*blockSize) {
|
||||||
|
maxOccupancy = numBlocks*blockSize;
|
||||||
|
bestBlockSize = blockSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bestBlockSize;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
}} // namespace Kokkos::Impl
|
||||||
|
|
||||||
|
#endif // KOKKOS_HAVE_CUDA
|
||||||
|
#endif /* #ifndef KOKKOS_CUDA_INTERNAL_HPP */
|
||||||
|
|
||||||
1799
lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
Executable file
1799
lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
Executable file
File diff suppressed because it is too large
Load Diff
424
lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
Executable file
424
lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
Executable file
@ -0,0 +1,424 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_CUDA_REDUCESCAN_HPP
|
||||||
|
#define KOKKOS_CUDA_REDUCESCAN_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
|
||||||
|
/* only compile this file if CUDA is enabled for Kokkos */
|
||||||
|
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include <Kokkos_Parallel.hpp>
|
||||||
|
#include <impl/Kokkos_FunctorAdapter.hpp>
|
||||||
|
#include <impl/Kokkos_Error.hpp>
|
||||||
|
#include <Cuda/Kokkos_Cuda_Vectorization.hpp>
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//Shfl based reductions
|
||||||
|
/*
|
||||||
|
* Algorithmic constraints:
|
||||||
|
* (a) threads with same threadIdx.y have same value
|
||||||
|
* (b) blockDim.x == power of two
|
||||||
|
* (c) blockDim.z == 1
|
||||||
|
*/
|
||||||
|
|
||||||
|
template< class ValueType , class JoinOp>
|
||||||
|
__device__
|
||||||
|
inline void cuda_intra_warp_reduction( ValueType& result,
|
||||||
|
const JoinOp& join,
|
||||||
|
const int max_active_thread = blockDim.y) {
|
||||||
|
|
||||||
|
unsigned int shift = 1;
|
||||||
|
|
||||||
|
//Reduce over values from threads with different threadIdx.y
|
||||||
|
while(blockDim.x * shift < 32 ) {
|
||||||
|
const ValueType tmp = shfl_down(result, blockDim.x*shift,32u);
|
||||||
|
//Only join if upper thread is active (this allows non power of two for blockDim.y
|
||||||
|
if(threadIdx.y + shift < max_active_thread)
|
||||||
|
join(result , tmp);
|
||||||
|
shift*=2;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = shfl(result,0,32);
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class ValueType , class JoinOp>
|
||||||
|
__device__
|
||||||
|
inline void cuda_inter_warp_reduction( ValueType& value,
|
||||||
|
const JoinOp& join,
|
||||||
|
const int max_active_thread = blockDim.y) {
|
||||||
|
|
||||||
|
#define STEP_WIDTH 4
|
||||||
|
__shared__ char sh_result[sizeof(ValueType)*STEP_WIDTH];
|
||||||
|
ValueType* result = (ValueType*) & sh_result;
|
||||||
|
const unsigned step = 32 / blockDim.x;
|
||||||
|
unsigned shift = STEP_WIDTH;
|
||||||
|
const int id = threadIdx.y%step==0?threadIdx.y/step:65000;
|
||||||
|
if(id < STEP_WIDTH ) {
|
||||||
|
result[id] = value;
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
while (shift<=max_active_thread/step) {
|
||||||
|
if(shift<=id && shift+STEP_WIDTH>id && threadIdx.x==0) {
|
||||||
|
join(result[id%STEP_WIDTH],value);
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
shift+=STEP_WIDTH;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
value = result[0];
|
||||||
|
for(int i = 1; (i*step<=max_active_thread) && i<STEP_WIDTH; i++)
|
||||||
|
join(value,result[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class ValueType , class JoinOp>
|
||||||
|
__device__
|
||||||
|
inline void cuda_intra_block_reduction( ValueType& value,
|
||||||
|
const JoinOp& join,
|
||||||
|
const int max_active_thread = blockDim.y) {
|
||||||
|
cuda_intra_warp_reduction(value,join,max_active_thread);
|
||||||
|
cuda_inter_warp_reduction(value,join,max_active_thread);
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class FunctorType , class JoinOp>
|
||||||
|
__device__
|
||||||
|
bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , void >::reference_type value,
|
||||||
|
const JoinOp& join,
|
||||||
|
Cuda::size_type * const m_scratch_space,
|
||||||
|
typename FunctorValueTraits< FunctorType , void >::pointer_type const result,
|
||||||
|
Cuda::size_type * const m_scratch_flags,
|
||||||
|
const int max_active_thread = blockDim.y) {
|
||||||
|
typedef typename FunctorValueTraits< FunctorType , void >::pointer_type pointer_type;
|
||||||
|
typedef typename FunctorValueTraits< FunctorType , void >::value_type value_type;
|
||||||
|
|
||||||
|
//Do the intra-block reduction with shfl operations and static shared memory
|
||||||
|
cuda_intra_block_reduction(value,join,max_active_thread);
|
||||||
|
|
||||||
|
const unsigned id = threadIdx.y*blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
|
//One thread in the block writes block result to global scratch_memory
|
||||||
|
if(id == 0 ) {
|
||||||
|
pointer_type global = ((pointer_type) m_scratch_space) + blockIdx.x;
|
||||||
|
*global = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
//One warp of last block performs inter block reduction through loading the block values from global scratch_memory
|
||||||
|
bool last_block = false;
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
if ( id < 32 ) {
|
||||||
|
Cuda::size_type count;
|
||||||
|
|
||||||
|
//Figure out whether this is the last block
|
||||||
|
if(id == 0)
|
||||||
|
count = Kokkos::atomic_fetch_add(m_scratch_flags,1);
|
||||||
|
count = Kokkos::shfl(count,0,32);
|
||||||
|
|
||||||
|
//Last block does the inter block reduction
|
||||||
|
if( count == gridDim.x - 1) {
|
||||||
|
//set flag back to zero
|
||||||
|
if(id == 0)
|
||||||
|
*m_scratch_flags = 0;
|
||||||
|
last_block = true;
|
||||||
|
value = 0;
|
||||||
|
|
||||||
|
pointer_type const volatile global = (pointer_type) m_scratch_space ;
|
||||||
|
|
||||||
|
//Reduce all global values with splitting work over threads in one warp
|
||||||
|
const int step_size = blockDim.x*blockDim.y < 32 ? blockDim.x*blockDim.y : 32;
|
||||||
|
for(int i=id; i<gridDim.x; i+=step_size) {
|
||||||
|
value_type tmp = global[i];
|
||||||
|
join(value, tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Perform shfl reductions within the warp only join if contribution is valid (allows gridDim.x non power of two and <32)
|
||||||
|
if (blockDim.x*blockDim.y > 1) {
|
||||||
|
value_type tmp = Kokkos::shfl_down(value, 1,32);
|
||||||
|
if( id + 1 < gridDim.x )
|
||||||
|
join(value, tmp);
|
||||||
|
}
|
||||||
|
if (blockDim.x*blockDim.y > 2) {
|
||||||
|
value_type tmp = Kokkos::shfl_down(value, 2,32);
|
||||||
|
if( id + 2 < gridDim.x )
|
||||||
|
join(value, tmp);
|
||||||
|
}
|
||||||
|
if (blockDim.x*blockDim.y > 4) {
|
||||||
|
value_type tmp = Kokkos::shfl_down(value, 4,32);
|
||||||
|
if( id + 4 < gridDim.x )
|
||||||
|
join(value, tmp);
|
||||||
|
}
|
||||||
|
if (blockDim.x*blockDim.y > 8) {
|
||||||
|
value_type tmp = Kokkos::shfl_down(value, 8,32);
|
||||||
|
if( id + 8 < gridDim.x )
|
||||||
|
join(value, tmp);
|
||||||
|
}
|
||||||
|
if (blockDim.x*blockDim.y > 16) {
|
||||||
|
value_type tmp = Kokkos::shfl_down(value, 16,32);
|
||||||
|
if( id + 16 < gridDim.x )
|
||||||
|
join(value, tmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//The last block has in its thread=0 the global reduction value through "value"
|
||||||
|
return last_block;
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// See section B.17 of Cuda C Programming Guide Version 3.2
|
||||||
|
// for discussion of
|
||||||
|
// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor)
|
||||||
|
// function qualifier which could be used to improve performance.
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Maximize shared memory and minimize L1 cache:
|
||||||
|
// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferShared );
|
||||||
|
// For 2.0 capability: 48 KB shared and 16 KB L1
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/*
|
||||||
|
* Algorithmic constraints:
|
||||||
|
* (a) blockDim.y is a power of two
|
||||||
|
* (b) blockDim.y <= 512
|
||||||
|
* (c) blockDim.x == blockDim.z == 1
|
||||||
|
*/
|
||||||
|
|
||||||
|
template< bool DoScan , class FunctorType , class ArgTag >
|
||||||
|
__device__
|
||||||
|
void cuda_intra_block_reduce_scan( const FunctorType & functor ,
|
||||||
|
const typename FunctorValueTraits< FunctorType , ArgTag >::pointer_type base_data )
|
||||||
|
{
|
||||||
|
typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ;
|
||||||
|
typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
|
||||||
|
|
||||||
|
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||||
|
|
||||||
|
const unsigned value_count = ValueTraits::value_count( functor );
|
||||||
|
const unsigned BlockSizeMask = blockDim.y - 1 ;
|
||||||
|
|
||||||
|
// Must have power of two thread count
|
||||||
|
|
||||||
|
if ( BlockSizeMask & blockDim.y ) { Kokkos::abort("Cuda::cuda_intra_block_scan requires power-of-two blockDim"); }
|
||||||
|
|
||||||
|
#define BLOCK_REDUCE_STEP( R , TD , S ) \
|
||||||
|
if ( ! ( R & ((1<<(S+1))-1) ) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S)) ); }
|
||||||
|
|
||||||
|
#define BLOCK_SCAN_STEP( TD , N , S ) \
|
||||||
|
if ( N == (1<<S) ) { ValueJoin::join( functor , TD , (TD - (value_count<<S))); }
|
||||||
|
|
||||||
|
const unsigned rtid_intra = threadIdx.y ^ BlockSizeMask ;
|
||||||
|
const pointer_type tdata_intra = base_data + value_count * threadIdx.y ;
|
||||||
|
|
||||||
|
{ // Intra-warp reduction:
|
||||||
|
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,0)
|
||||||
|
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,1)
|
||||||
|
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,2)
|
||||||
|
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,3)
|
||||||
|
BLOCK_REDUCE_STEP(rtid_intra,tdata_intra,4)
|
||||||
|
}
|
||||||
|
|
||||||
|
__syncthreads(); // Wait for all warps to reduce
|
||||||
|
|
||||||
|
{ // Inter-warp reduce-scan by a single warp to avoid extra synchronizations
|
||||||
|
const unsigned rtid_inter = ( threadIdx.y ^ BlockSizeMask ) << CudaTraits::WarpIndexShift ;
|
||||||
|
|
||||||
|
if ( rtid_inter < blockDim.y ) {
|
||||||
|
|
||||||
|
const pointer_type tdata_inter = base_data + value_count * ( rtid_inter ^ BlockSizeMask );
|
||||||
|
|
||||||
|
if ( (1<<5) < BlockSizeMask ) { BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,5) }
|
||||||
|
if ( (1<<6) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,6) }
|
||||||
|
if ( (1<<7) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,7) }
|
||||||
|
if ( (1<<8) < BlockSizeMask ) { __threadfence_block(); BLOCK_REDUCE_STEP(rtid_inter,tdata_inter,8) }
|
||||||
|
|
||||||
|
if ( DoScan ) {
|
||||||
|
|
||||||
|
int n = ( rtid_inter & 32 ) ? 32 : (
|
||||||
|
( rtid_inter & 64 ) ? 64 : (
|
||||||
|
( rtid_inter & 128 ) ? 128 : (
|
||||||
|
( rtid_inter & 256 ) ? 256 : 0 )));
|
||||||
|
|
||||||
|
if ( ! ( rtid_inter + n < blockDim.y ) ) n = 0 ;
|
||||||
|
|
||||||
|
BLOCK_SCAN_STEP(tdata_inter,n,8)
|
||||||
|
BLOCK_SCAN_STEP(tdata_inter,n,7)
|
||||||
|
BLOCK_SCAN_STEP(tdata_inter,n,6)
|
||||||
|
BLOCK_SCAN_STEP(tdata_inter,n,5)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__syncthreads(); // Wait for inter-warp reduce-scan to complete
|
||||||
|
|
||||||
|
if ( DoScan ) {
|
||||||
|
int n = ( rtid_intra & 1 ) ? 1 : (
|
||||||
|
( rtid_intra & 2 ) ? 2 : (
|
||||||
|
( rtid_intra & 4 ) ? 4 : (
|
||||||
|
( rtid_intra & 8 ) ? 8 : (
|
||||||
|
( rtid_intra & 16 ) ? 16 : 0 ))));
|
||||||
|
|
||||||
|
if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ;
|
||||||
|
|
||||||
|
BLOCK_SCAN_STEP(tdata_intra,n,4) __threadfence_block();
|
||||||
|
BLOCK_SCAN_STEP(tdata_intra,n,3) __threadfence_block();
|
||||||
|
BLOCK_SCAN_STEP(tdata_intra,n,2) __threadfence_block();
|
||||||
|
BLOCK_SCAN_STEP(tdata_intra,n,1) __threadfence_block();
|
||||||
|
BLOCK_SCAN_STEP(tdata_intra,n,0)
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef BLOCK_SCAN_STEP
|
||||||
|
#undef BLOCK_REDUCE_STEP
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/**\brief Input value-per-thread starting at 'shared_data'.
|
||||||
|
* Reduction value at last thread's location.
|
||||||
|
*
|
||||||
|
* If 'DoScan' then write blocks' scan values and block-groups' scan values.
|
||||||
|
*
|
||||||
|
* Global reduce result is in the last threads' 'shared_data' location.
|
||||||
|
*/
|
||||||
|
template< bool DoScan , class FunctorType , class ArgTag >
|
||||||
|
__device__
|
||||||
|
bool cuda_single_inter_block_reduce_scan( const FunctorType & functor ,
|
||||||
|
const Cuda::size_type block_id ,
|
||||||
|
const Cuda::size_type block_count ,
|
||||||
|
Cuda::size_type * const shared_data ,
|
||||||
|
Cuda::size_type * const global_data ,
|
||||||
|
Cuda::size_type * const global_flags )
|
||||||
|
{
|
||||||
|
typedef Cuda::size_type size_type ;
|
||||||
|
typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ;
|
||||||
|
typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
|
||||||
|
typedef FunctorValueInit< FunctorType , ArgTag > ValueInit ;
|
||||||
|
typedef FunctorValueOps< FunctorType , ArgTag > ValueOps ;
|
||||||
|
|
||||||
|
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||||
|
typedef typename ValueTraits::reference_type reference_type ;
|
||||||
|
|
||||||
|
const unsigned BlockSizeMask = blockDim.y - 1 ;
|
||||||
|
const unsigned BlockSizeShift = power_of_two_if_valid( blockDim.y );
|
||||||
|
|
||||||
|
// Must have power of two thread count
|
||||||
|
if ( BlockSizeMask & blockDim.y ) { Kokkos::abort("Cuda::cuda_single_inter_block_reduce_scan requires power-of-two blockDim"); }
|
||||||
|
|
||||||
|
const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) >
|
||||||
|
word_count( ValueTraits::value_size( functor ) / sizeof(size_type) );
|
||||||
|
|
||||||
|
// Reduce the accumulation for the entire block.
|
||||||
|
cuda_intra_block_reduce_scan<false,FunctorType,ArgTag>( functor , pointer_type(shared_data) );
|
||||||
|
|
||||||
|
{
|
||||||
|
// Write accumulation total to global scratch space.
|
||||||
|
// Accumulation total is the last thread's data.
|
||||||
|
size_type * const shared = shared_data + word_count.value * BlockSizeMask ;
|
||||||
|
size_type * const global = global_data + word_count.value * block_id ;
|
||||||
|
|
||||||
|
for ( size_type i = threadIdx.y ; i < word_count.value ; i += blockDim.y ) { global[i] = shared[i] ; }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Contributing blocks note that their contribution has been completed via an atomic-increment flag
|
||||||
|
// If this block is not the last block to contribute to this group then the block is done.
|
||||||
|
const bool is_last_block =
|
||||||
|
! __syncthreads_or( threadIdx.y ? 0 : ( 1 + atomicInc( global_flags , block_count - 1 ) < block_count ) );
|
||||||
|
|
||||||
|
if ( is_last_block ) {
|
||||||
|
|
||||||
|
const size_type b = ( long(block_count) * long(threadIdx.y) ) >> BlockSizeShift ;
|
||||||
|
const size_type e = ( long(block_count) * long( threadIdx.y + 1 ) ) >> BlockSizeShift ;
|
||||||
|
|
||||||
|
{
|
||||||
|
void * const shared_ptr = shared_data + word_count.value * threadIdx.y ;
|
||||||
|
reference_type shared_value = ValueInit::init( functor , shared_ptr );
|
||||||
|
|
||||||
|
for ( size_type i = b ; i < e ; ++i ) {
|
||||||
|
ValueJoin::join( functor , shared_ptr , global_data + word_count.value * i );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cuda_intra_block_reduce_scan<DoScan,FunctorType,ArgTag>( functor , pointer_type(shared_data) );
|
||||||
|
|
||||||
|
if ( DoScan ) {
|
||||||
|
|
||||||
|
size_type * const shared_value = shared_data + word_count.value * ( threadIdx.y ? threadIdx.y - 1 : blockDim.y );
|
||||||
|
|
||||||
|
if ( ! threadIdx.y ) { ValueInit::init( functor , shared_value ); }
|
||||||
|
|
||||||
|
// Join previous inclusive scan value to each member
|
||||||
|
for ( size_type i = b ; i < e ; ++i ) {
|
||||||
|
size_type * const global_value = global_data + word_count.value * i ;
|
||||||
|
ValueJoin::join( functor , shared_value , global_value );
|
||||||
|
ValueOps ::copy( functor , global_value , shared_value );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return is_last_block ;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size in bytes required for inter block reduce or scan
|
||||||
|
template< bool DoScan , class FunctorType , class ArgTag >
|
||||||
|
inline
|
||||||
|
unsigned cuda_single_inter_block_reduce_scan_shmem( const FunctorType & functor , const unsigned BlockSize )
|
||||||
|
{
|
||||||
|
return ( BlockSize + 2 ) * Impl::FunctorValueTraits< FunctorType , ArgTag >::value_size( functor );
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* #if defined( __CUDACC__ ) */
|
||||||
|
#endif /* KOKKOS_CUDA_REDUCESCAN_HPP */
|
||||||
|
|
||||||
298
lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp
Executable file
298
lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp
Executable file
@ -0,0 +1,298 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
#ifndef KOKKOS_CUDA_VECTORIZATION_HPP
|
||||||
|
#define KOKKOS_CUDA_VECTORIZATION_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
|
||||||
|
/* only compile this file if CUDA is enabled for Kokkos */
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
#include <Kokkos_Cuda.hpp>
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
|
||||||
|
// Shuffle only makes sense on >= Kepler GPUs; it doesn't work on CPUs
|
||||||
|
// or other GPUs. We provide a generic definition (which is trivial
|
||||||
|
// and doesn't do what it claims to do) because we don't actually use
|
||||||
|
// this function unless we are on a suitable GPU, with a suitable
|
||||||
|
// Scalar type. (For example, in the mat-vec, the "ThreadsPerRow"
|
||||||
|
// internal parameter depends both on the ExecutionSpace and the Scalar type,
|
||||||
|
// and it controls whether shfl_down() gets called.)
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template< typename Scalar >
|
||||||
|
struct shfl_union {
|
||||||
|
enum {n = sizeof(Scalar)/4};
|
||||||
|
float fval[n];
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Scalar value() {
|
||||||
|
return *(Scalar*) fval;
|
||||||
|
}
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator= (Scalar& value_) {
|
||||||
|
float* const val_ptr = (float*) &value_;
|
||||||
|
for(int i=0; i<n ; i++) {
|
||||||
|
fval[i] = val_ptr[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator= (const Scalar& value_) {
|
||||||
|
float* const val_ptr = (float*) &value_;
|
||||||
|
for(int i=0; i<n ; i++) {
|
||||||
|
fval[i] = val_ptr[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __CUDA_ARCH__
|
||||||
|
#if (__CUDA_ARCH__ >= 300)
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
int shfl(const int &val, const int& srcLane, const int& width ) {
|
||||||
|
return __shfl(val,srcLane,width);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
float shfl(const float &val, const int& srcLane, const int& width ) {
|
||||||
|
return __shfl(val,srcLane,width);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type& width
|
||||||
|
) {
|
||||||
|
Scalar tmp1 = val;
|
||||||
|
float tmp = *reinterpret_cast<float*>(&tmp1);
|
||||||
|
tmp = __shfl(tmp,srcLane,width);
|
||||||
|
return *reinterpret_cast<Scalar*>(&tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
double shfl(const double &val, const int& srcLane, const int& width) {
|
||||||
|
int lo = __double2loint(val);
|
||||||
|
int hi = __double2hiint(val);
|
||||||
|
lo = __shfl(lo,srcLane,width);
|
||||||
|
hi = __shfl(hi,srcLane,width);
|
||||||
|
return __hiloint2double(hi,lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) == 8) ,int>::type& width) {
|
||||||
|
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
|
||||||
|
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
|
||||||
|
lo = __shfl(lo,srcLane,width);
|
||||||
|
hi = __shfl(hi,srcLane,width);
|
||||||
|
const double tmp = __hiloint2double(hi,lo);
|
||||||
|
return *(reinterpret_cast<const Scalar*>(&tmp));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) > 8) ,int>::type& width) {
|
||||||
|
Impl::shfl_union<Scalar> s_val;
|
||||||
|
Impl::shfl_union<Scalar> r_val;
|
||||||
|
s_val = val;
|
||||||
|
|
||||||
|
for(int i = 0; i<s_val.n; i++)
|
||||||
|
r_val.fval[i] = __shfl(s_val.fval[i],srcLane,width);
|
||||||
|
return r_val.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
int shfl_down(const int &val, const int& delta, const int& width) {
|
||||||
|
return __shfl_down(val,delta,width);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
float shfl_down(const float &val, const int& delta, const int& width) {
|
||||||
|
return __shfl_down(val,delta,width);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) {
|
||||||
|
Scalar tmp1 = val;
|
||||||
|
float tmp = *reinterpret_cast<float*>(&tmp1);
|
||||||
|
tmp = __shfl_down(tmp,delta,width);
|
||||||
|
return *reinterpret_cast<Scalar*>(&tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
double shfl_down(const double &val, const int& delta, const int& width) {
|
||||||
|
int lo = __double2loint(val);
|
||||||
|
int hi = __double2hiint(val);
|
||||||
|
lo = __shfl_down(lo,delta,width);
|
||||||
|
hi = __shfl_down(hi,delta,width);
|
||||||
|
return __hiloint2double(hi,lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) {
|
||||||
|
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
|
||||||
|
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
|
||||||
|
lo = __shfl_down(lo,delta,width);
|
||||||
|
hi = __shfl_down(hi,delta,width);
|
||||||
|
const double tmp = __hiloint2double(hi,lo);
|
||||||
|
return *(reinterpret_cast<const Scalar*>(&tmp));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) > 8) , int >::type & width) {
|
||||||
|
Impl::shfl_union<Scalar> s_val;
|
||||||
|
Impl::shfl_union<Scalar> r_val;
|
||||||
|
s_val = val;
|
||||||
|
|
||||||
|
for(int i = 0; i<s_val.n; i++)
|
||||||
|
r_val.fval[i] = __shfl_down(s_val.fval[i],delta,width);
|
||||||
|
return r_val.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
int shfl_up(const int &val, const int& delta, const int& width ) {
|
||||||
|
return __shfl_up(val,delta,width);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
float shfl_up(const float &val, const int& delta, const int& width ) {
|
||||||
|
return __shfl_up(val,delta,width);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) {
|
||||||
|
Scalar tmp1 = val;
|
||||||
|
float tmp = *reinterpret_cast<float*>(&tmp1);
|
||||||
|
tmp = __shfl_up(tmp,delta,width);
|
||||||
|
return *reinterpret_cast<Scalar*>(&tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
double shfl_up(const double &val, const int& delta, const int& width ) {
|
||||||
|
int lo = __double2loint(val);
|
||||||
|
int hi = __double2hiint(val);
|
||||||
|
lo = __shfl_up(lo,delta,width);
|
||||||
|
hi = __shfl_up(hi,delta,width);
|
||||||
|
return __hiloint2double(hi,lo);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) {
|
||||||
|
int lo = __double2loint(*reinterpret_cast<const double*>(&val));
|
||||||
|
int hi = __double2hiint(*reinterpret_cast<const double*>(&val));
|
||||||
|
lo = __shfl_up(lo,delta,width);
|
||||||
|
hi = __shfl_up(hi,delta,width);
|
||||||
|
const double tmp = __hiloint2double(hi,lo);
|
||||||
|
return *(reinterpret_cast<const Scalar*>(&tmp));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) > 8) , int >::type & width) {
|
||||||
|
Impl::shfl_union<Scalar> s_val;
|
||||||
|
Impl::shfl_union<Scalar> r_val;
|
||||||
|
s_val = val;
|
||||||
|
|
||||||
|
for(int i = 0; i<s_val.n; i++)
|
||||||
|
r_val.fval[i] = __shfl_up(s_val.fval[i],delta,width);
|
||||||
|
return r_val.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
template<typename Scalar>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Scalar shfl(const Scalar &val, const int& srcLane, const int& width) {
|
||||||
|
if(width > 1) Kokkos::abort("Error: calling shfl from a device with CC<3.0.");
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Scalar shfl_down(const Scalar &val, const int& delta, const int& width) {
|
||||||
|
if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0.");
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Scalar shfl_up(const Scalar &val, const int& delta, const int& width) {
|
||||||
|
if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0.");
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
template<typename Scalar>
|
||||||
|
inline
|
||||||
|
Scalar shfl(const Scalar &val, const int& srcLane, const int& width) {
|
||||||
|
if(width > 1) Kokkos::abort("Error: calling shfl from a device with CC<3.0.");
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
inline
|
||||||
|
Scalar shfl_down(const Scalar &val, const int& delta, const int& width) {
|
||||||
|
if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0.");
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
inline
|
||||||
|
Scalar shfl_up(const Scalar &val, const int& delta, const int& width) {
|
||||||
|
if(width > 1) Kokkos::abort("Error: calling shfl_down from a device with CC<3.0.");
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // KOKKOS_HAVE_CUDA
|
||||||
|
#endif
|
||||||
312
lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
Executable file
312
lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
Executable file
@ -0,0 +1,312 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_CUDA_VIEW_HPP
|
||||||
|
#define KOKKOS_CUDA_VIEW_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
|
||||||
|
/* only compile this file if CUDA is enabled for Kokkos */
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#include <Kokkos_HostSpace.hpp>
|
||||||
|
#include <Kokkos_CudaSpace.hpp>
|
||||||
|
#include <Kokkos_View.hpp>
|
||||||
|
|
||||||
|
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct AssertShapeBoundsAbort< CudaSpace >
|
||||||
|
{
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static void apply( const size_t /* rank */ ,
|
||||||
|
const size_t /* n0 */ , const size_t /* n1 */ ,
|
||||||
|
const size_t /* n2 */ , const size_t /* n3 */ ,
|
||||||
|
const size_t /* n4 */ , const size_t /* n5 */ ,
|
||||||
|
const size_t /* n6 */ , const size_t /* n7 */ ,
|
||||||
|
|
||||||
|
const size_t /* arg_rank */ ,
|
||||||
|
const size_t /* i0 */ , const size_t /* i1 */ ,
|
||||||
|
const size_t /* i2 */ , const size_t /* i3 */ ,
|
||||||
|
const size_t /* i4 */ , const size_t /* i5 */ ,
|
||||||
|
const size_t /* i6 */ , const size_t /* i7 */ )
|
||||||
|
{
|
||||||
|
Kokkos::abort("Kokkos::View array bounds violation");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Cuda Texture fetches can be performed for 4, 8 and 16 byte objects (int,int2,int4)
|
||||||
|
// Via reinterpret_case this can be used to support all scalar types of those sizes.
|
||||||
|
// Any other scalar type falls back to either normal reads out of global memory,
|
||||||
|
// or using the __ldg intrinsic on Kepler GPUs or newer (Compute Capability >= 3.0)
|
||||||
|
|
||||||
|
template< typename ValueType
|
||||||
|
, class MemorySpace
|
||||||
|
, class AliasType =
|
||||||
|
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 4 ) , int ,
|
||||||
|
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 8 ) , ::int2 ,
|
||||||
|
typename Kokkos::Impl::if_c< ( sizeof(ValueType) == 16 ) , ::int4 , void
|
||||||
|
>::type
|
||||||
|
>::type
|
||||||
|
>::type
|
||||||
|
>
|
||||||
|
class CudaTextureFetch {
|
||||||
|
private:
|
||||||
|
|
||||||
|
cuda_texture_object_type m_obj ;
|
||||||
|
const ValueType * m_alloc_ptr ;
|
||||||
|
int m_offset ;
|
||||||
|
|
||||||
|
void attach( const ValueType * const arg_ptr, AllocationTracker const & tracker )
|
||||||
|
{
|
||||||
|
typedef char const * const byte;
|
||||||
|
|
||||||
|
m_alloc_ptr = reinterpret_cast<ValueType *>(tracker.alloc_ptr());
|
||||||
|
|
||||||
|
size_t byte_offset = reinterpret_cast<byte>(arg_ptr) - reinterpret_cast<byte>(m_alloc_ptr);
|
||||||
|
const bool ok_aligned = 0 == byte_offset % sizeof(ValueType);
|
||||||
|
|
||||||
|
const size_t count = tracker.alloc_size() / sizeof(ValueType);
|
||||||
|
const bool ok_contains = (m_alloc_ptr <= arg_ptr) && (arg_ptr < (m_alloc_ptr + count));
|
||||||
|
|
||||||
|
if (ok_aligned && ok_contains) {
|
||||||
|
if (tracker.attribute() == NULL ) {
|
||||||
|
MemorySpace::texture_object_attach(
|
||||||
|
tracker
|
||||||
|
, sizeof(ValueType)
|
||||||
|
, cudaCreateChannelDesc< AliasType >()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
m_obj = dynamic_cast<TextureAttribute*>(tracker.attribute())->m_tex_obj;
|
||||||
|
m_offset = arg_ptr - m_alloc_ptr;
|
||||||
|
}
|
||||||
|
else if( !ok_contains ) {
|
||||||
|
throw_runtime_exception("Error: cannot attach a texture object to a tracker which does not bound the pointer.");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw_runtime_exception("Error: cannot attach a texture object to an incorrectly aligned pointer.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch() : m_obj() , m_alloc_ptr() , m_offset() {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
~CudaTextureFetch() {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch( const CudaTextureFetch & rhs )
|
||||||
|
: m_obj( rhs.m_obj )
|
||||||
|
, m_alloc_ptr( rhs.m_alloc_ptr )
|
||||||
|
, m_offset( rhs.m_offset )
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs )
|
||||||
|
{
|
||||||
|
m_obj = rhs.m_obj ;
|
||||||
|
m_alloc_ptr = rhs.m_alloc_ptr ;
|
||||||
|
m_offset = rhs.m_offset ;
|
||||||
|
return *this ;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION explicit
|
||||||
|
CudaTextureFetch( const ValueType * const arg_ptr, AllocationTracker const & tracker )
|
||||||
|
: m_obj( 0 ) , m_alloc_ptr(0) , m_offset(0)
|
||||||
|
{
|
||||||
|
#if defined( KOKKOS_USE_LDG_INTRINSIC )
|
||||||
|
m_alloc_ptr(arg_ptr);
|
||||||
|
#elif defined( __CUDACC__ ) && ! defined( __CUDA_ARCH__ )
|
||||||
|
if ( arg_ptr != NULL ) {
|
||||||
|
if ( tracker.is_valid() ) {
|
||||||
|
attach( arg_ptr, tracker );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
AllocationTracker found_tracker = AllocationTracker::find<typename MemorySpace::allocator>(arg_ptr);
|
||||||
|
if ( found_tracker.is_valid() ) {
|
||||||
|
attach( arg_ptr, found_tracker );
|
||||||
|
} else {
|
||||||
|
throw_runtime_exception("Error: cannot attach a texture object to an untracked pointer!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
operator const ValueType * () const { return m_alloc_ptr + m_offset ; }
|
||||||
|
|
||||||
|
|
||||||
|
template< typename iType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
ValueType operator[]( const iType & i ) const
|
||||||
|
{
|
||||||
|
#if defined( KOKKOS_USE_LDG_INTRINSIC ) && defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
|
||||||
|
AliasType v = __ldg(reinterpret_cast<AliasType*>(&m_alloc_ptr[i]));
|
||||||
|
return *(reinterpret_cast<ValueType*> (&v));
|
||||||
|
#elif defined( __CUDA_ARCH__ ) && ( 300 <= __CUDA_ARCH__ )
|
||||||
|
AliasType v = tex1Dfetch<AliasType>( m_obj , i + m_offset );
|
||||||
|
return *(reinterpret_cast<ValueType*> (&v));
|
||||||
|
#else
|
||||||
|
return m_alloc_ptr[ i + m_offset ];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template< typename ValueType, class MemorySpace >
|
||||||
|
class CudaTextureFetch< const ValueType, MemorySpace, void >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
const ValueType * m_ptr ;
|
||||||
|
public:
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch() : m_ptr(0) {};
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
~CudaTextureFetch() {
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch( const ValueType * ptr, const AllocationTracker & ) : m_ptr(ptr) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch( const CudaTextureFetch & rhs ) : m_ptr(rhs.m_ptr) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch & operator = ( const CudaTextureFetch & rhs ) {
|
||||||
|
m_ptr = rhs.m_ptr;
|
||||||
|
return *this ;
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch( ValueType * const base_view_ptr, AllocationTracker const & /*tracker*/ ) {
|
||||||
|
m_ptr = base_view_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
CudaTextureFetch & operator = (const ValueType* base_view_ptr) {
|
||||||
|
m_ptr = base_view_ptr;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
operator const ValueType * () const { return m_ptr ; }
|
||||||
|
|
||||||
|
|
||||||
|
template< typename iType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
ValueType operator[]( const iType & i ) const
|
||||||
|
{
|
||||||
|
return m_ptr[ i ];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
/** \brief Replace Default ViewDataHandle with Cuda texture fetch specialization
|
||||||
|
* if 'const' value type, CudaSpace and random access.
|
||||||
|
*/
|
||||||
|
template< class ViewTraits >
|
||||||
|
class ViewDataHandle< ViewTraits ,
|
||||||
|
typename enable_if< ( is_same< typename ViewTraits::memory_space,CudaSpace>::value ||
|
||||||
|
is_same< typename ViewTraits::memory_space,CudaUVMSpace>::value )
|
||||||
|
&&
|
||||||
|
is_same<typename ViewTraits::const_value_type,typename ViewTraits::value_type>::value
|
||||||
|
&&
|
||||||
|
ViewTraits::memory_traits::RandomAccess
|
||||||
|
>::type >
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
enum { ReturnTypeIsReference = false };
|
||||||
|
|
||||||
|
typedef Impl::CudaTextureFetch< typename ViewTraits::value_type
|
||||||
|
, typename ViewTraits::memory_space> handle_type;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
static handle_type create_handle( typename ViewTraits::value_type * arg_data_ptr, AllocationTracker const & arg_tracker )
|
||||||
|
{
|
||||||
|
return handle_type(arg_data_ptr, arg_tracker);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef typename ViewTraits::value_type return_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif // KOKKOS_HAVE_CUDA
|
||||||
|
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */
|
||||||
|
|
||||||
119
lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp
Executable file
119
lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp
Executable file
@ -0,0 +1,119 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_CUDA_ABORT_HPP
|
||||||
|
#define KOKKOS_CUDA_ABORT_HPP
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
#include "Kokkos_Macros.hpp"
|
||||||
|
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||||
|
|
||||||
|
#include <cuda.h>
|
||||||
|
|
||||||
|
#if ! defined( CUDA_VERSION ) || ( CUDA_VERSION < 4010 )
|
||||||
|
#error "Cuda version 4.1 or greater required"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ( __CUDA_ARCH__ < 200 )
|
||||||
|
#error "Cuda device capability 2.0 or greater required"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
/* Cuda runtime function, declared in <crt/device_runtime.h>
|
||||||
|
* Requires capability 2.x or better.
|
||||||
|
*/
|
||||||
|
extern __device__ void __assertfail(
|
||||||
|
const void *message,
|
||||||
|
const void *file,
|
||||||
|
unsigned int line,
|
||||||
|
const void *function,
|
||||||
|
size_t charsize);
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
__device__ inline
|
||||||
|
void cuda_abort( const char * const message )
|
||||||
|
{
|
||||||
|
#ifndef __APPLE__
|
||||||
|
const char empty[] = "" ;
|
||||||
|
|
||||||
|
__assertfail( (const void *) message ,
|
||||||
|
(const void *) empty ,
|
||||||
|
(unsigned int) 0 ,
|
||||||
|
(const void *) empty ,
|
||||||
|
sizeof(char) );
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void cuda_abort( const char * const ) {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* #if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA )
|
||||||
|
namespace Kokkos {
|
||||||
|
__device__ inline
|
||||||
|
void abort( const char * const message ) { Kokkos::Impl::cuda_abort(message); }
|
||||||
|
}
|
||||||
|
#endif /* defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */
|
||||||
|
|
||||||
1945
lib/kokkos/core/src/KokkosExp_View.hpp
Executable file
1945
lib/kokkos/core/src/KokkosExp_View.hpp
Executable file
File diff suppressed because it is too large
Load Diff
285
lib/kokkos/core/src/Kokkos_Atomic.hpp
Executable file
285
lib/kokkos/core/src/Kokkos_Atomic.hpp
Executable file
@ -0,0 +1,285 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
/// \file Kokkos_Atomic.hpp
|
||||||
|
/// \brief Atomic functions
|
||||||
|
///
|
||||||
|
/// This header file defines prototypes for the following atomic functions:
|
||||||
|
/// - exchange
|
||||||
|
/// - compare and exchange
|
||||||
|
/// - add
|
||||||
|
///
|
||||||
|
/// Supported types include:
|
||||||
|
/// - signed and unsigned 4 and 8 byte integers
|
||||||
|
/// - float
|
||||||
|
/// - double
|
||||||
|
///
|
||||||
|
/// They are implemented through GCC compatible intrinsics, OpenMP
|
||||||
|
/// directives and native CUDA intrinsics.
|
||||||
|
///
|
||||||
|
/// Including this header file requires one of the following
|
||||||
|
/// compilers:
|
||||||
|
/// - NVCC (for CUDA device code only)
|
||||||
|
/// - GCC (for host code only)
|
||||||
|
/// - Intel (for host code only)
|
||||||
|
/// - A compiler that supports OpenMP 3.1 (for host code only)
|
||||||
|
|
||||||
|
#ifndef KOKKOS_ATOMIC_HPP
|
||||||
|
#define KOKKOS_ATOMIC_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
#include <Kokkos_HostSpace.hpp>
|
||||||
|
#include <impl/Kokkos_Traits.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
#if defined(_WIN32)
|
||||||
|
#define KOKKOS_ATOMICS_USE_WINDOWS
|
||||||
|
#else
|
||||||
|
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||||
|
|
||||||
|
// Compiling NVIDIA device code, must use Cuda atomics:
|
||||||
|
|
||||||
|
#define KOKKOS_ATOMICS_USE_CUDA
|
||||||
|
|
||||||
|
#elif ! defined( KOKKOS_ATOMICS_USE_GCC ) && \
|
||||||
|
! defined( KOKKOS_ATOMICS_USE_INTEL ) && \
|
||||||
|
! defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||||
|
|
||||||
|
// Compiling for non-Cuda atomic implementation has not been pre-selected.
|
||||||
|
// Choose the best implementation for the detected compiler.
|
||||||
|
// Preference: GCC, INTEL, OMP31
|
||||||
|
|
||||||
|
#if defined( KOKKOS_COMPILER_GNU ) || \
|
||||||
|
defined( KOKKOS_COMPILER_CLANG ) || \
|
||||||
|
( defined ( KOKKOS_COMPILER_NVCC ) && defined ( __GNUC__ ) )
|
||||||
|
|
||||||
|
#define KOKKOS_ATOMICS_USE_GCC
|
||||||
|
|
||||||
|
#elif defined( KOKKOS_COMPILER_INTEL ) || \
|
||||||
|
defined( KOKKOS_COMPILER_CRAYC )
|
||||||
|
|
||||||
|
#define KOKKOS_ATOMICS_USE_INTEL
|
||||||
|
|
||||||
|
#elif defined( _OPENMP ) && ( 201107 <= _OPENMP )
|
||||||
|
|
||||||
|
#define KOKKOS_ATOMICS_USE_OMP31
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#error "KOKKOS_ATOMICS_USE : Unsupported compiler"
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* Not pre-selected atomic implementation */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Forward decalaration of functions supporting arbitrary sized atomics
|
||||||
|
// This is necessary since Kokkos_Atomic.hpp is internally included very early
|
||||||
|
// through Kokkos_HostSpace.hpp as well as the allocation tracker.
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
/// \brief Aquire a lock for the address
|
||||||
|
///
|
||||||
|
/// This function tries to aquire the lock for the hash value derived
|
||||||
|
/// from the provided ptr. If the lock is successfully aquired the
|
||||||
|
/// function returns true. Otherwise it returns false.
|
||||||
|
__device__ inline
|
||||||
|
bool lock_address_cuda_space(void* ptr);
|
||||||
|
|
||||||
|
/// \brief Release lock for the address
|
||||||
|
///
|
||||||
|
/// This function releases the lock for the hash value derived
|
||||||
|
/// from the provided ptr. This function should only be called
|
||||||
|
/// after previously successfully aquiring a lock with
|
||||||
|
/// lock_address.
|
||||||
|
__device__ inline
|
||||||
|
void unlock_address_cuda_space(void* ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
template <typename T>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void atomic_add(volatile T * const dest, const T src);
|
||||||
|
|
||||||
|
// Atomic increment
|
||||||
|
template<typename T>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void atomic_increment(volatile T* a);
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void atomic_decrement(volatile T* a);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if ! defined(_WIN32)
|
||||||
|
#include<impl/Kokkos_Atomic_Assembly_X86.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
|
||||||
|
inline
|
||||||
|
const char * atomic_query_version()
|
||||||
|
{
|
||||||
|
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||||
|
return "KOKKOS_ATOMICS_USE_CUDA" ;
|
||||||
|
#elif defined( KOKKOS_ATOMICS_USE_GCC )
|
||||||
|
return "KOKKOS_ATOMICS_USE_GCC" ;
|
||||||
|
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
|
||||||
|
return "KOKKOS_ATOMICS_USE_INTEL" ;
|
||||||
|
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||||
|
return "KOKKOS_ATOMICS_USE_OMP31" ;
|
||||||
|
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
|
||||||
|
return "KOKKOS_ATOMICS_USE_WINDOWS";
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include "impl/Kokkos_Atomic_Windows.hpp"
|
||||||
|
#else
|
||||||
|
//#include "impl/Kokkos_Atomic_Assembly_X86.hpp"
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Atomic exchange
|
||||||
|
//
|
||||||
|
// template< typename T >
|
||||||
|
// T atomic_exchange( volatile T* const dest , const T val )
|
||||||
|
// { T tmp = *dest ; *dest = val ; return tmp ; }
|
||||||
|
|
||||||
|
#include "impl/Kokkos_Atomic_Exchange.hpp"
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Atomic compare-and-exchange
|
||||||
|
//
|
||||||
|
// template<class T>
|
||||||
|
// bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val)
|
||||||
|
// { bool equal = compare == *dest ; if ( equal ) { *dest = val ; } return equal ; }
|
||||||
|
|
||||||
|
#include "impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp"
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Atomic fetch and add
|
||||||
|
//
|
||||||
|
// template<class T>
|
||||||
|
// T atomic_fetch_add(volatile T* const dest, const T val)
|
||||||
|
// { T tmp = *dest ; *dest += val ; return tmp ; }
|
||||||
|
|
||||||
|
#include "impl/Kokkos_Atomic_Fetch_Add.hpp"
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Atomic fetch and sub
|
||||||
|
//
|
||||||
|
// template<class T>
|
||||||
|
// T atomic_fetch_sub(volatile T* const dest, const T val)
|
||||||
|
// { T tmp = *dest ; *dest -= val ; return tmp ; }
|
||||||
|
|
||||||
|
#include "impl/Kokkos_Atomic_Fetch_Sub.hpp"
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Atomic fetch and or
|
||||||
|
//
|
||||||
|
// template<class T>
|
||||||
|
// T atomic_fetch_or(volatile T* const dest, const T val)
|
||||||
|
// { T tmp = *dest ; *dest = tmp | val ; return tmp ; }
|
||||||
|
|
||||||
|
#include "impl/Kokkos_Atomic_Fetch_Or.hpp"
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Atomic fetch and and
|
||||||
|
//
|
||||||
|
// template<class T>
|
||||||
|
// T atomic_fetch_and(volatile T* const dest, const T val)
|
||||||
|
// { T tmp = *dest ; *dest = tmp & val ; return tmp ; }
|
||||||
|
|
||||||
|
#include "impl/Kokkos_Atomic_Fetch_And.hpp"
|
||||||
|
#endif /*Not _WIN32*/
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Memory fence
|
||||||
|
//
|
||||||
|
// All loads and stores from this thread will be globally consistent before continuing
|
||||||
|
//
|
||||||
|
// void memory_fence() {...};
|
||||||
|
#include "impl/Kokkos_Memory_Fence.hpp"
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Provide volatile_load and safe_load
|
||||||
|
//
|
||||||
|
// T volatile_load(T const volatile * const ptr);
|
||||||
|
//
|
||||||
|
// T const& safe_load(T const * const ptr);
|
||||||
|
// XEON PHI
|
||||||
|
// T safe_load(T const * const ptr
|
||||||
|
|
||||||
|
#include "impl/Kokkos_Volatile_Load.hpp"
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
#include "impl/Kokkos_Atomic_Generic.hpp"
|
||||||
|
#endif
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// This atomic-style macro should be an inlined function, not a macro
|
||||||
|
|
||||||
|
#if defined( KOKKOS_COMPILER_GNU ) && !defined(__PGIC__)
|
||||||
|
|
||||||
|
#define KOKKOS_NONTEMPORAL_PREFETCH_LOAD(addr) __builtin_prefetch(addr,0,0)
|
||||||
|
#define KOKKOS_NONTEMPORAL_PREFETCH_STORE(addr) __builtin_prefetch(addr,1,0)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define KOKKOS_NONTEMPORAL_PREFETCH_LOAD(addr) ((void)0)
|
||||||
|
#define KOKKOS_NONTEMPORAL_PREFETCH_STORE(addr) ((void)0)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* KOKKOS_ATOMIC_HPP */
|
||||||
|
|
||||||
228
lib/kokkos/core/src/Kokkos_Core.hpp
Executable file
228
lib/kokkos/core/src/Kokkos_Core.hpp
Executable file
@ -0,0 +1,228 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_CORE_HPP
|
||||||
|
#define KOKKOS_CORE_HPP
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Include the execution space header files for the enabled execution spaces.
|
||||||
|
|
||||||
|
#include <Kokkos_Core_fwd.hpp>
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_CUDA )
|
||||||
|
#include <Kokkos_Cuda.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_OPENMP )
|
||||||
|
#include <Kokkos_OpenMP.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_SERIAL )
|
||||||
|
#include <Kokkos_Serial.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||||
|
#include <Kokkos_Threads.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <Kokkos_Pair.hpp>
|
||||||
|
#include <Kokkos_View.hpp>
|
||||||
|
#include <Kokkos_Vectorization.hpp>
|
||||||
|
#include <Kokkos_Atomic.hpp>
|
||||||
|
#include <Kokkos_hwloc.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
struct InitArguments {
|
||||||
|
int num_threads;
|
||||||
|
int num_numa;
|
||||||
|
int device_id;
|
||||||
|
|
||||||
|
InitArguments() {
|
||||||
|
num_threads = -1;
|
||||||
|
num_numa = -1;
|
||||||
|
device_id = -1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void initialize(int& narg, char* arg[]);
|
||||||
|
|
||||||
|
void initialize(const InitArguments& args = InitArguments());
|
||||||
|
|
||||||
|
/** \brief Finalize the spaces that were initialized via Kokkos::initialize */
|
||||||
|
void finalize();
|
||||||
|
|
||||||
|
/** \brief Finalize all known execution spaces */
|
||||||
|
void finalize_all();
|
||||||
|
|
||||||
|
void fence();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_CXX11
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
// should only by used by kokkos_malloc and kokkos_free
|
||||||
|
struct MallocHelper
|
||||||
|
{
|
||||||
|
static void increment_ref_count( AllocationTracker const & tracker )
|
||||||
|
{
|
||||||
|
tracker.increment_ref_count();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void decrement_ref_count( AllocationTracker const & tracker )
|
||||||
|
{
|
||||||
|
tracker.decrement_ref_count();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace Impl
|
||||||
|
|
||||||
|
/* Allocate memory from a memory space.
|
||||||
|
* The allocation is tracked in Kokkos memory tracking system, so
|
||||||
|
* leaked memory can be identified.
|
||||||
|
*/
|
||||||
|
template< class Arg = DefaultExecutionSpace>
|
||||||
|
void* kokkos_malloc(const std::string label, size_t count) {
|
||||||
|
typedef typename Arg::memory_space MemorySpace;
|
||||||
|
Impl::AllocationTracker tracker = MemorySpace::allocate_and_track(label,count);;
|
||||||
|
Impl::MallocHelper::increment_ref_count( tracker );
|
||||||
|
return tracker.alloc_ptr();
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class Arg = DefaultExecutionSpace>
|
||||||
|
void* kokkos_malloc(const size_t& count) {
|
||||||
|
return kokkos_malloc<Arg>("DefaultLabel",count);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Free memory from a memory space.
|
||||||
|
*/
|
||||||
|
template< class Arg = DefaultExecutionSpace>
|
||||||
|
void kokkos_free(const void* ptr) {
|
||||||
|
typedef typename Arg::memory_space MemorySpace;
|
||||||
|
typedef typename MemorySpace::allocator allocator;
|
||||||
|
Impl::AllocationTracker tracker = Impl::AllocationTracker::find<allocator>(ptr);
|
||||||
|
if (tracker.is_valid()) {
|
||||||
|
Impl::MallocHelper::decrement_ref_count( tracker );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template< class Arg = DefaultExecutionSpace>
|
||||||
|
const void* kokkos_realloc(const void* old_ptr, size_t size) {
|
||||||
|
typedef typename Arg::memory_space MemorySpace;
|
||||||
|
typedef typename MemorySpace::allocator allocator;
|
||||||
|
Impl::AllocationTracker tracker = Impl::AllocationTracker::find<allocator>(old_ptr);
|
||||||
|
|
||||||
|
tracker.reallocate(size);
|
||||||
|
|
||||||
|
return tracker.alloc_ptr();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Experimental {
|
||||||
|
|
||||||
|
template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
|
||||||
|
inline
|
||||||
|
void * kokkos_malloc( const size_t arg_alloc_size )
|
||||||
|
{
|
||||||
|
typedef typename Space::memory_space MemorySpace ;
|
||||||
|
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ;
|
||||||
|
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordHost ;
|
||||||
|
|
||||||
|
RecordHost * const r = RecordHost::allocate( MemorySpace() , "kokkos_malloc" , arg_alloc_size );
|
||||||
|
|
||||||
|
RecordBase::increment( r );
|
||||||
|
|
||||||
|
return r->data();
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
|
||||||
|
inline
|
||||||
|
void kokkos_free( void * arg_alloc )
|
||||||
|
{
|
||||||
|
typedef typename Space::memory_space MemorySpace ;
|
||||||
|
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ;
|
||||||
|
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordHost ;
|
||||||
|
|
||||||
|
RecordHost * const r = RecordHost::get_record( arg_alloc );
|
||||||
|
|
||||||
|
RecordBase::decrement( r );
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class Space = typename Kokkos::DefaultExecutionSpace::memory_space >
|
||||||
|
inline
|
||||||
|
void * kokkos_realloc( void * arg_alloc , const size_t arg_alloc_size )
|
||||||
|
{
|
||||||
|
typedef typename Space::memory_space MemorySpace ;
|
||||||
|
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< void , void > RecordBase ;
|
||||||
|
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< MemorySpace , void > RecordHost ;
|
||||||
|
|
||||||
|
RecordHost * const r_old = RecordHost::get_record( arg_alloc );
|
||||||
|
RecordHost * const r_new = RecordHost::allocate( MemorySpace() , "kokkos_malloc" , arg_alloc_size );
|
||||||
|
|
||||||
|
Kokkos::Impl::DeepCopy<MemorySpace,MemorySpace>( r_new->data() , r_old->data()
|
||||||
|
, std::min( r_old->size() , r_new->size() ) );
|
||||||
|
|
||||||
|
RecordBase::increment( r_new );
|
||||||
|
RecordBase::decrement( r_old );
|
||||||
|
|
||||||
|
return r_new->data();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Experimental
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
170
lib/kokkos/core/src/Kokkos_Core_fwd.hpp
Executable file
170
lib/kokkos/core/src/Kokkos_Core_fwd.hpp
Executable file
@ -0,0 +1,170 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_CORE_FWD_HPP
|
||||||
|
#define KOKKOS_CORE_FWD_HPP
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Kokkos_Macros.hpp does introspection on configuration options
|
||||||
|
// and compiler environment then sets a collection of #define macros.
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Forward declarations for class inter-relationships
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
class HostSpace ; ///< Memory space for main process and CPU execution spaces
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_SERIAL )
|
||||||
|
class Serial ; ///< Execution space main process on CPU
|
||||||
|
#endif // defined( KOKKOS_HAVE_SERIAL )
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||||
|
class Threads ; ///< Execution space with pthreads back-end
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_OPENMP )
|
||||||
|
class OpenMP ; ///< OpenMP execution space
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_CUDA )
|
||||||
|
class CudaSpace ; ///< Memory space on Cuda GPU
|
||||||
|
class CudaUVMSpace ; ///< Memory space on Cuda GPU with UVM
|
||||||
|
class CudaHostPinnedSpace ; ///< Memory space on Host accessible to Cuda GPU
|
||||||
|
class Cuda ; ///< Execution space for Cuda GPU
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<class ExecutionSpace, class MemorySpace>
|
||||||
|
struct Device;
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Set the default execution space.
|
||||||
|
|
||||||
|
/// Define Kokkos::DefaultExecutionSpace as per configuration option
|
||||||
|
/// or chosen from the enabled execution spaces in the following order:
|
||||||
|
/// Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Serial
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||||
|
typedef Cuda DefaultExecutionSpace ;
|
||||||
|
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||||
|
typedef OpenMP DefaultExecutionSpace ;
|
||||||
|
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||||
|
typedef Threads DefaultExecutionSpace ;
|
||||||
|
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||||
|
typedef Serial DefaultExecutionSpace ;
|
||||||
|
#else
|
||||||
|
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||||
|
typedef OpenMP DefaultHostExecutionSpace ;
|
||||||
|
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||||
|
typedef Threads DefaultHostExecutionSpace ;
|
||||||
|
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||||
|
typedef Serial DefaultHostExecutionSpace ;
|
||||||
|
#elif defined ( KOKKOS_HAVE_OPENMP )
|
||||||
|
typedef OpenMP DefaultHostExecutionSpace ;
|
||||||
|
#elif defined ( KOKKOS_HAVE_PTHREAD )
|
||||||
|
typedef Threads DefaultHostExecutionSpace ;
|
||||||
|
#elif defined ( KOKKOS_HAVE_SERIAL )
|
||||||
|
typedef Serial DefaultHostExecutionSpace ;
|
||||||
|
#else
|
||||||
|
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Detect the active execution space and define its memory space.
|
||||||
|
// This is used to verify whether a running kernel can access
|
||||||
|
// a given memory space.
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_HAVE_CUDA)
|
||||||
|
typedef Kokkos::CudaSpace ActiveExecutionMemorySpace ;
|
||||||
|
#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
typedef Kokkos::HostSpace ActiveExecutionMemorySpace ;
|
||||||
|
#else
|
||||||
|
typedef void ActiveExecutionMemorySpace ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template< class ActiveSpace , class MemorySpace >
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace {
|
||||||
|
enum {value = 0};
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class Space >
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace< Space , Space >
|
||||||
|
{
|
||||||
|
enum {value = 1};
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify(void) {}
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify(const void *) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#define KOKKOS_RESTRICT_EXECUTION_TO_DATA( DATA_SPACE , DATA_PTR ) \
|
||||||
|
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \
|
||||||
|
Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify( DATA_PTR )
|
||||||
|
|
||||||
|
#define KOKKOS_RESTRICT_EXECUTION_TO_( DATA_SPACE ) \
|
||||||
|
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \
|
||||||
|
Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify()
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
void fence();
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* #ifndef KOKKOS_CORE_FWD_HPP */
|
||||||
|
|
||||||
268
lib/kokkos/core/src/Kokkos_Cuda.hpp
Executable file
268
lib/kokkos/core/src/Kokkos_Cuda.hpp
Executable file
@ -0,0 +1,268 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_CUDA_HPP
|
||||||
|
#define KOKKOS_CUDA_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Core_fwd.hpp>
|
||||||
|
|
||||||
|
// If CUDA execution space is enabled then use this header file.
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_CUDA )
|
||||||
|
|
||||||
|
#include <iosfwd>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <Kokkos_CudaSpace.hpp>
|
||||||
|
|
||||||
|
#include <Kokkos_Parallel.hpp>
|
||||||
|
#include <Kokkos_Layout.hpp>
|
||||||
|
#include <Kokkos_ScratchSpace.hpp>
|
||||||
|
#include <Kokkos_MemoryTraits.hpp>
|
||||||
|
#include <impl/Kokkos_Tags.hpp>
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
class CudaExec ;
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/// \class Cuda
|
||||||
|
/// \brief Kokkos Execution Space that uses CUDA to run on GPUs.
|
||||||
|
///
|
||||||
|
/// An "execution space" represents a parallel execution model. It tells Kokkos
|
||||||
|
/// how to parallelize the execution of kernels in a parallel_for or
|
||||||
|
/// parallel_reduce. For example, the Threads execution space uses Pthreads or
|
||||||
|
/// C++11 threads on a CPU, the OpenMP execution space uses the OpenMP language
|
||||||
|
/// extensions, and the Serial execution space executes "parallel" kernels
|
||||||
|
/// sequentially. The Cuda execution space uses NVIDIA's CUDA programming
|
||||||
|
/// model to execute kernels in parallel on GPUs.
|
||||||
|
class Cuda {
|
||||||
|
public:
|
||||||
|
//! \name Type declarations that all Kokkos execution spaces must provide.
|
||||||
|
//@{
|
||||||
|
|
||||||
|
//! Tag this class as a kokkos execution space
|
||||||
|
typedef Cuda execution_space ;
|
||||||
|
|
||||||
|
#if defined( KOKKOS_USE_CUDA_UVM )
|
||||||
|
//! This execution space's preferred memory space.
|
||||||
|
typedef CudaUVMSpace memory_space ;
|
||||||
|
#else
|
||||||
|
//! This execution space's preferred memory space.
|
||||||
|
typedef CudaSpace memory_space ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//! This execution space preferred device_type
|
||||||
|
typedef Kokkos::Device<execution_space,memory_space> device_type;
|
||||||
|
|
||||||
|
//! The size_type best suited for this execution space.
|
||||||
|
typedef memory_space::size_type size_type ;
|
||||||
|
|
||||||
|
//! This execution space's preferred array layout.
|
||||||
|
typedef LayoutLeft array_layout ;
|
||||||
|
|
||||||
|
//!
|
||||||
|
typedef ScratchMemorySpace< Cuda > scratch_memory_space ;
|
||||||
|
|
||||||
|
//@}
|
||||||
|
//--------------------------------------------------
|
||||||
|
//! \name Functions that all Kokkos devices must implement.
|
||||||
|
//@{
|
||||||
|
|
||||||
|
/// \brief True if and only if this method is being called in a
|
||||||
|
/// thread-parallel function.
|
||||||
|
KOKKOS_INLINE_FUNCTION static int in_parallel() {
|
||||||
|
#if defined( __CUDA_ARCH__ )
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Set the device in a "sleep" state.
|
||||||
|
*
|
||||||
|
* This function sets the device in a "sleep" state in which it is
|
||||||
|
* not ready for work. This may consume less resources than if the
|
||||||
|
* device were in an "awake" state, but it may also take time to
|
||||||
|
* bring the device from a sleep state to be ready for work.
|
||||||
|
*
|
||||||
|
* \return True if the device is in the "sleep" state, else false if
|
||||||
|
* the device is actively working and could not enter the "sleep"
|
||||||
|
* state.
|
||||||
|
*/
|
||||||
|
static bool sleep();
|
||||||
|
|
||||||
|
/// \brief Wake the device from the 'sleep' state so it is ready for work.
|
||||||
|
///
|
||||||
|
/// \return True if the device is in the "ready" state, else "false"
|
||||||
|
/// if the device is actively working (which also means that it's
|
||||||
|
/// awake).
|
||||||
|
static bool wake();
|
||||||
|
|
||||||
|
/// \brief Wait until all dispatched functors complete.
|
||||||
|
///
|
||||||
|
/// The parallel_for or parallel_reduce dispatch of a functor may
|
||||||
|
/// return asynchronously, before the functor completes. This
|
||||||
|
/// method does not return until all dispatched functors on this
|
||||||
|
/// device have completed.
|
||||||
|
static void fence();
|
||||||
|
|
||||||
|
//! Free any resources being consumed by the device.
|
||||||
|
static void finalize();
|
||||||
|
|
||||||
|
//! Has been initialized
|
||||||
|
static int is_initialized();
|
||||||
|
|
||||||
|
//! Print configuration information to the given output stream.
|
||||||
|
static void print_configuration( std::ostream & , const bool detail = false );
|
||||||
|
|
||||||
|
//@}
|
||||||
|
//--------------------------------------------------
|
||||||
|
//! \name Cuda space instances
|
||||||
|
|
||||||
|
~Cuda() {}
|
||||||
|
Cuda();
|
||||||
|
explicit Cuda( const int instance_id );
|
||||||
|
|
||||||
|
Cuda( const Cuda & ) = default ;
|
||||||
|
Cuda( Cuda && ) = default ;
|
||||||
|
Cuda & operator = ( const Cuda & ) = default ;
|
||||||
|
Cuda & operator = ( Cuda && ) = default ;
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------
|
||||||
|
//! \name Device-specific functions
|
||||||
|
//@{
|
||||||
|
|
||||||
|
struct SelectDevice {
|
||||||
|
int cuda_device_id ;
|
||||||
|
SelectDevice() : cuda_device_id(0) {}
|
||||||
|
explicit SelectDevice( int id ) : cuda_device_id( id ) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
//! Initialize, telling the CUDA run-time library which device to use.
|
||||||
|
static void initialize( const SelectDevice = SelectDevice()
|
||||||
|
, const size_t num_instances = 1 );
|
||||||
|
|
||||||
|
/// \brief Cuda device architecture of the selected device.
|
||||||
|
///
|
||||||
|
/// This matches the __CUDA_ARCH__ specification.
|
||||||
|
static size_type device_arch();
|
||||||
|
|
||||||
|
//! Query device count.
|
||||||
|
static size_type detect_device_count();
|
||||||
|
|
||||||
|
/** \brief Detect the available devices and their architecture
|
||||||
|
* as defined by the __CUDA_ARCH__ specification.
|
||||||
|
*/
|
||||||
|
static std::vector<unsigned> detect_device_arch();
|
||||||
|
|
||||||
|
cudaStream_t cuda_stream() const { return m_stream ; }
|
||||||
|
int cuda_device() const { return m_device ; }
|
||||||
|
|
||||||
|
//@}
|
||||||
|
//--------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
cudaStream_t m_stream ;
|
||||||
|
int m_device ;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace
|
||||||
|
< Kokkos::CudaSpace
|
||||||
|
, Kokkos::Cuda::scratch_memory_space
|
||||||
|
>
|
||||||
|
{
|
||||||
|
enum { value = true };
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace
|
||||||
|
< Kokkos::HostSpace
|
||||||
|
, Kokkos::Cuda::scratch_memory_space
|
||||||
|
>
|
||||||
|
{
|
||||||
|
enum { value = false };
|
||||||
|
inline static void verify( void ) { CudaSpace::access_error(); }
|
||||||
|
inline static void verify( const void * p ) { CudaSpace::access_error(p); }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
#include <Cuda/Kokkos_CudaExec.hpp>
|
||||||
|
#include <Cuda/Kokkos_Cuda_View.hpp>
|
||||||
|
|
||||||
|
#include <KokkosExp_View.hpp>
|
||||||
|
#include <Cuda/KokkosExp_Cuda_View.hpp>
|
||||||
|
|
||||||
|
#include <Cuda/Kokkos_Cuda_Parallel.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||||
|
#endif /* #ifndef KOKKOS_CUDA_HPP */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
656
lib/kokkos/core/src/Kokkos_CudaSpace.hpp
Executable file
656
lib/kokkos/core/src/Kokkos_CudaSpace.hpp
Executable file
@ -0,0 +1,656 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_CUDASPACE_HPP
|
||||||
|
#define KOKKOS_CUDASPACE_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Core_fwd.hpp>
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_CUDA )
|
||||||
|
|
||||||
|
#include <iosfwd>
|
||||||
|
#include <typeinfo>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <Kokkos_HostSpace.hpp>
|
||||||
|
|
||||||
|
#include <impl/Kokkos_AllocationTracker.hpp>
|
||||||
|
|
||||||
|
#include <Cuda/Kokkos_Cuda_abort.hpp>
|
||||||
|
#include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Cuda on-device memory management */
|
||||||
|
|
||||||
|
class CudaSpace {
|
||||||
|
public:
|
||||||
|
|
||||||
|
//! Tag this class as a kokkos memory space
|
||||||
|
typedef CudaSpace memory_space ;
|
||||||
|
typedef Kokkos::Cuda execution_space ;
|
||||||
|
typedef Kokkos::Device<execution_space,memory_space> device_type;
|
||||||
|
|
||||||
|
typedef unsigned int size_type ;
|
||||||
|
|
||||||
|
typedef Impl::CudaMallocAllocator allocator;
|
||||||
|
|
||||||
|
/** \brief Allocate a contiguous block of memory.
|
||||||
|
*
|
||||||
|
* The input label is associated with the block of memory.
|
||||||
|
* The block of memory is tracked via reference counting where
|
||||||
|
* allocation gives it a reference count of one.
|
||||||
|
*/
|
||||||
|
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
|
||||||
|
|
||||||
|
/*--------------------------------*/
|
||||||
|
/** \brief Cuda specific function to attached texture object to an allocation.
|
||||||
|
* Output the texture object, base pointer, and offset from the input pointer.
|
||||||
|
*/
|
||||||
|
#if defined( __CUDACC__ )
|
||||||
|
static void texture_object_attach( Impl::AllocationTracker const & tracker
|
||||||
|
, unsigned type_size
|
||||||
|
, ::cudaChannelFormatDesc const & desc
|
||||||
|
);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*--------------------------------*/
|
||||||
|
|
||||||
|
CudaSpace();
|
||||||
|
CudaSpace( const CudaSpace & rhs ) = default ;
|
||||||
|
CudaSpace & operator = ( const CudaSpace & rhs ) = default ;
|
||||||
|
~CudaSpace() = default ;
|
||||||
|
|
||||||
|
/**\brief Allocate memory in the cuda space */
|
||||||
|
void * allocate( const size_t arg_alloc_size ) const ;
|
||||||
|
|
||||||
|
/**\brief Deallocate memory in the cuda space */
|
||||||
|
void deallocate( void * const arg_alloc_ptr
|
||||||
|
, const size_t arg_alloc_size ) const ;
|
||||||
|
|
||||||
|
/*--------------------------------*/
|
||||||
|
/** \brief Error reporting for HostSpace attempt to access CudaSpace */
|
||||||
|
static void access_error();
|
||||||
|
static void access_error( const void * const );
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
int m_device ; ///< Which Cuda device
|
||||||
|
|
||||||
|
// friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
/// \brief Initialize lock array for arbitrary size atomics.
|
||||||
|
///
|
||||||
|
/// Arbitrary atomics are implemented using a hash table of locks
|
||||||
|
/// where the hash value is derived from the address of the
|
||||||
|
/// object for which an atomic operation is performed.
|
||||||
|
/// This function initializes the locks to zero (unset).
|
||||||
|
void init_lock_array_cuda_space();
|
||||||
|
|
||||||
|
/// \brief Retrieve the pointer to the lock array for arbitrary size atomics.
|
||||||
|
///
|
||||||
|
/// Arbitrary atomics are implemented using a hash table of locks
|
||||||
|
/// where the hash value is derived from the address of the
|
||||||
|
/// object for which an atomic operation is performed.
|
||||||
|
/// This function retrieves the lock array pointer.
|
||||||
|
/// If the array is not yet allocated it will do so.
|
||||||
|
int* lock_array_cuda_space_ptr(bool deallocate = false);
|
||||||
|
}
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Cuda memory that is accessible to Host execution space
|
||||||
|
* through Cuda's unified virtual memory (UVM) runtime.
|
||||||
|
*/
|
||||||
|
class CudaUVMSpace {
|
||||||
|
public:
|
||||||
|
|
||||||
|
//! Tag this class as a kokkos memory space
|
||||||
|
typedef CudaUVMSpace memory_space ;
|
||||||
|
typedef Cuda execution_space ;
|
||||||
|
typedef Kokkos::Device<execution_space,memory_space> device_type;
|
||||||
|
typedef unsigned int size_type ;
|
||||||
|
|
||||||
|
/** \brief If UVM capability is available */
|
||||||
|
static bool available();
|
||||||
|
|
||||||
|
typedef Impl::CudaUVMAllocator allocator;
|
||||||
|
|
||||||
|
/** \brief Allocate a contiguous block of memory.
|
||||||
|
*
|
||||||
|
* The input label is associated with the block of memory.
|
||||||
|
* The block of memory is tracked via reference counting where
|
||||||
|
* allocation gives it a reference count of one.
|
||||||
|
*/
|
||||||
|
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
|
||||||
|
|
||||||
|
|
||||||
|
/** \brief Cuda specific function to attached texture object to an allocation.
|
||||||
|
* Output the texture object, base pointer, and offset from the input pointer.
|
||||||
|
*/
|
||||||
|
#if defined( __CUDACC__ )
|
||||||
|
static void texture_object_attach( Impl::AllocationTracker const & tracker
|
||||||
|
, unsigned type_size
|
||||||
|
, ::cudaChannelFormatDesc const & desc
|
||||||
|
);
|
||||||
|
#endif
|
||||||
|
/*--------------------------------*/
|
||||||
|
|
||||||
|
CudaUVMSpace();
|
||||||
|
CudaUVMSpace( const CudaUVMSpace & rhs ) = default ;
|
||||||
|
CudaUVMSpace & operator = ( const CudaUVMSpace & rhs ) = default ;
|
||||||
|
~CudaUVMSpace() = default ;
|
||||||
|
|
||||||
|
/**\brief Allocate memory in the cuda space */
|
||||||
|
void * allocate( const size_t arg_alloc_size ) const ;
|
||||||
|
|
||||||
|
/**\brief Deallocate memory in the cuda space */
|
||||||
|
void deallocate( void * const arg_alloc_ptr
|
||||||
|
, const size_t arg_alloc_size ) const ;
|
||||||
|
|
||||||
|
/*--------------------------------*/
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
int m_device ; ///< Which Cuda device
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Host memory that is accessible to Cuda execution space
|
||||||
|
* through Cuda's host-pinned memory allocation.
|
||||||
|
*/
|
||||||
|
class CudaHostPinnedSpace {
|
||||||
|
public:
|
||||||
|
|
||||||
|
//! Tag this class as a kokkos memory space
|
||||||
|
/** \brief Memory is in HostSpace so use the HostSpace::execution_space */
|
||||||
|
typedef HostSpace::execution_space execution_space ;
|
||||||
|
typedef CudaHostPinnedSpace memory_space ;
|
||||||
|
typedef Kokkos::Device<execution_space,memory_space> device_type;
|
||||||
|
typedef unsigned int size_type ;
|
||||||
|
|
||||||
|
|
||||||
|
typedef Impl::CudaHostAllocator allocator ;
|
||||||
|
|
||||||
|
/** \brief Allocate a contiguous block of memory.
|
||||||
|
*
|
||||||
|
* The input label is associated with the block of memory.
|
||||||
|
* The block of memory is tracked via reference counting where
|
||||||
|
* allocation gives it a reference count of one.
|
||||||
|
*/
|
||||||
|
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
|
||||||
|
|
||||||
|
/*--------------------------------*/
|
||||||
|
|
||||||
|
CudaHostPinnedSpace();
|
||||||
|
CudaHostPinnedSpace( const CudaHostPinnedSpace & rhs ) = default ;
|
||||||
|
CudaHostPinnedSpace & operator = ( const CudaHostPinnedSpace & rhs ) = default ;
|
||||||
|
~CudaHostPinnedSpace() = default ;
|
||||||
|
|
||||||
|
/**\brief Allocate memory in the cuda space */
|
||||||
|
void * allocate( const size_t arg_alloc_size ) const ;
|
||||||
|
|
||||||
|
/**\brief Deallocate memory in the cuda space */
|
||||||
|
void deallocate( void * const arg_alloc_ptr
|
||||||
|
, const size_t arg_alloc_size ) const ;
|
||||||
|
|
||||||
|
/*--------------------------------*/
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template<> struct DeepCopy< CudaSpace , CudaSpace >
|
||||||
|
{
|
||||||
|
DeepCopy( void * dst , const void * src , size_t );
|
||||||
|
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct DeepCopy< CudaSpace , HostSpace >
|
||||||
|
{
|
||||||
|
DeepCopy( void * dst , const void * src , size_t );
|
||||||
|
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct DeepCopy< HostSpace , CudaSpace >
|
||||||
|
{
|
||||||
|
DeepCopy( void * dst , const void * src , size_t );
|
||||||
|
DeepCopy( const Cuda & , void * dst , const void * src , size_t );
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct DeepCopy< CudaSpace , CudaUVMSpace >
|
||||||
|
{
|
||||||
|
inline
|
||||||
|
DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ (void) DeepCopy< CudaSpace , CudaSpace >( dst , src , n ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct DeepCopy< CudaSpace , CudaHostPinnedSpace >
|
||||||
|
{
|
||||||
|
inline
|
||||||
|
DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ (void) DeepCopy< CudaSpace , HostSpace >( dst , src , n ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<> struct DeepCopy< CudaUVMSpace , CudaSpace >
|
||||||
|
{
|
||||||
|
inline
|
||||||
|
DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ (void) DeepCopy< CudaSpace , CudaSpace >( dst , src , n ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct DeepCopy< CudaUVMSpace , CudaUVMSpace >
|
||||||
|
{
|
||||||
|
inline
|
||||||
|
DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ (void) DeepCopy< CudaSpace , CudaSpace >( dst , src , n ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct DeepCopy< CudaUVMSpace , CudaHostPinnedSpace >
|
||||||
|
{
|
||||||
|
inline
|
||||||
|
DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ (void) DeepCopy< CudaSpace , HostSpace >( dst , src , n ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct DeepCopy< CudaUVMSpace , HostSpace >
|
||||||
|
{
|
||||||
|
inline
|
||||||
|
DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ (void) DeepCopy< CudaSpace , HostSpace >( dst , src , n ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<> struct DeepCopy< CudaHostPinnedSpace , CudaSpace >
|
||||||
|
{
|
||||||
|
inline
|
||||||
|
DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ (void) DeepCopy< HostSpace , CudaSpace >( dst , src , n ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct DeepCopy< CudaHostPinnedSpace , CudaUVMSpace >
|
||||||
|
{
|
||||||
|
inline
|
||||||
|
DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ (void) DeepCopy< HostSpace , CudaSpace >( dst , src , n ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct DeepCopy< CudaHostPinnedSpace , CudaHostPinnedSpace >
|
||||||
|
{
|
||||||
|
inline
|
||||||
|
DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ (void) DeepCopy< HostSpace , HostSpace >( dst , src , n ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct DeepCopy< CudaHostPinnedSpace , HostSpace >
|
||||||
|
{
|
||||||
|
inline
|
||||||
|
DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ (void) DeepCopy< HostSpace , HostSpace >( dst , src , n ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<> struct DeepCopy< HostSpace , CudaUVMSpace >
|
||||||
|
{
|
||||||
|
inline
|
||||||
|
DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ (void) DeepCopy< HostSpace , CudaSpace >( dst , src , n ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct DeepCopy< HostSpace , CudaHostPinnedSpace >
|
||||||
|
{
|
||||||
|
inline
|
||||||
|
DeepCopy( void * dst , const void * src , size_t n )
|
||||||
|
{ (void) DeepCopy< HostSpace , HostSpace >( dst , src , n ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
/** Running in CudaSpace attempting to access HostSpace: error */
|
||||||
|
template<>
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::HostSpace >
|
||||||
|
{
|
||||||
|
enum { value = false };
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify( void )
|
||||||
|
{ Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify( const void * )
|
||||||
|
{ Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Running in CudaSpace accessing CudaUVMSpace: ok */
|
||||||
|
template<>
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >
|
||||||
|
{
|
||||||
|
enum { value = true };
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Running in CudaSpace accessing CudaHostPinnedSpace: ok */
|
||||||
|
template<>
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >
|
||||||
|
{
|
||||||
|
enum { value = true };
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Running in CudaSpace attempting to access an unknown space: error */
|
||||||
|
template< class OtherSpace >
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace<
|
||||||
|
typename enable_if< ! is_same<Kokkos::CudaSpace,OtherSpace>::value , Kokkos::CudaSpace >::type ,
|
||||||
|
OtherSpace >
|
||||||
|
{
|
||||||
|
enum { value = false };
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify( void )
|
||||||
|
{ Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify( const void * )
|
||||||
|
{ Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/** Running in HostSpace attempting to access CudaSpace */
|
||||||
|
template<>
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaSpace >
|
||||||
|
{
|
||||||
|
enum { value = false };
|
||||||
|
inline static void verify( void ) { CudaSpace::access_error(); }
|
||||||
|
inline static void verify( const void * p ) { CudaSpace::access_error(p); }
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Running in HostSpace accessing CudaUVMSpace is OK */
|
||||||
|
template<>
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaUVMSpace >
|
||||||
|
{
|
||||||
|
enum { value = true };
|
||||||
|
inline static void verify( void ) { }
|
||||||
|
inline static void verify( const void * ) { }
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Running in HostSpace accessing CudaHostPinnedSpace is OK */
|
||||||
|
template<>
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >
|
||||||
|
{
|
||||||
|
enum { value = true };
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify( void ) {}
|
||||||
|
KOKKOS_INLINE_FUNCTION static void verify( const void * ) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Experimental {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template<>
|
||||||
|
class SharedAllocationRecord< Kokkos::CudaSpace , void >
|
||||||
|
: public SharedAllocationRecord< void , void >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
friend class SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;
|
||||||
|
|
||||||
|
typedef SharedAllocationRecord< void , void > RecordBase ;
|
||||||
|
|
||||||
|
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
|
||||||
|
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
|
||||||
|
|
||||||
|
static void deallocate( RecordBase * );
|
||||||
|
|
||||||
|
static ::cudaTextureObject_t
|
||||||
|
attach_texture_object( const unsigned sizeof_alias
|
||||||
|
, void * const alloc_ptr
|
||||||
|
, const size_t alloc_size );
|
||||||
|
|
||||||
|
static RecordBase s_root_record ;
|
||||||
|
|
||||||
|
::cudaTextureObject_t m_tex_obj ;
|
||||||
|
const Kokkos::CudaSpace m_space ;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
~SharedAllocationRecord();
|
||||||
|
SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
|
||||||
|
|
||||||
|
SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
, const RecordBase::function_type arg_dealloc = & deallocate
|
||||||
|
);
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
std::string get_label() const ;
|
||||||
|
|
||||||
|
static SharedAllocationRecord * allocate( const Kokkos::CudaSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
);
|
||||||
|
|
||||||
|
template< typename AliasType >
|
||||||
|
inline
|
||||||
|
::cudaTextureObject_t attach_texture_object()
|
||||||
|
{
|
||||||
|
static_assert( ( std::is_same< AliasType , int >::value ||
|
||||||
|
std::is_same< AliasType , ::int2 >::value ||
|
||||||
|
std::is_same< AliasType , ::int4 >::value )
|
||||||
|
, "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
|
||||||
|
|
||||||
|
if ( m_tex_obj == 0 ) {
|
||||||
|
m_tex_obj = attach_texture_object( sizeof(AliasType)
|
||||||
|
, (void*) RecordBase::m_alloc_ptr
|
||||||
|
, RecordBase::m_alloc_size );
|
||||||
|
}
|
||||||
|
|
||||||
|
return m_tex_obj ;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename AliasType >
|
||||||
|
inline
|
||||||
|
int attach_texture_object_offset( const AliasType * const ptr )
|
||||||
|
{
|
||||||
|
// Texture object is attached to the entire allocation range
|
||||||
|
return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
|
||||||
|
}
|
||||||
|
|
||||||
|
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
|
||||||
|
|
||||||
|
static void print_records( std::ostream & , const Kokkos::CudaSpace & , bool detail = false );
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<>
|
||||||
|
class SharedAllocationRecord< Kokkos::CudaUVMSpace , void >
|
||||||
|
: public SharedAllocationRecord< void , void >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef SharedAllocationRecord< void , void > RecordBase ;
|
||||||
|
|
||||||
|
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
|
||||||
|
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
|
||||||
|
|
||||||
|
static void deallocate( RecordBase * );
|
||||||
|
|
||||||
|
static RecordBase s_root_record ;
|
||||||
|
|
||||||
|
::cudaTextureObject_t m_tex_obj ;
|
||||||
|
const Kokkos::CudaUVMSpace m_space ;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
~SharedAllocationRecord();
|
||||||
|
SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
|
||||||
|
|
||||||
|
SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
, const RecordBase::function_type arg_dealloc = & deallocate
|
||||||
|
);
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
std::string get_label() const ;
|
||||||
|
|
||||||
|
static SharedAllocationRecord * allocate( const Kokkos::CudaUVMSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
);
|
||||||
|
|
||||||
|
template< typename AliasType >
|
||||||
|
inline
|
||||||
|
::cudaTextureObject_t attach_texture_object()
|
||||||
|
{
|
||||||
|
static_assert( ( std::is_same< AliasType , int >::value ||
|
||||||
|
std::is_same< AliasType , ::int2 >::value ||
|
||||||
|
std::is_same< AliasType , ::int4 >::value )
|
||||||
|
, "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
|
||||||
|
|
||||||
|
if ( m_tex_obj == 0 ) {
|
||||||
|
m_tex_obj = SharedAllocationRecord< Kokkos::CudaSpace , void >::
|
||||||
|
attach_texture_object( sizeof(AliasType)
|
||||||
|
, (void*) RecordBase::m_alloc_ptr
|
||||||
|
, RecordBase::m_alloc_size );
|
||||||
|
}
|
||||||
|
|
||||||
|
return m_tex_obj ;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename AliasType >
|
||||||
|
inline
|
||||||
|
int attach_texture_object_offset( const AliasType * const ptr )
|
||||||
|
{
|
||||||
|
// Texture object is attached to the entire allocation range
|
||||||
|
return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
|
||||||
|
}
|
||||||
|
|
||||||
|
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
|
||||||
|
|
||||||
|
static void print_records( std::ostream & , const Kokkos::CudaUVMSpace & , bool detail = false );
|
||||||
|
};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
class SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >
|
||||||
|
: public SharedAllocationRecord< void , void >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef SharedAllocationRecord< void , void > RecordBase ;
|
||||||
|
|
||||||
|
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
|
||||||
|
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
|
||||||
|
|
||||||
|
static void deallocate( RecordBase * );
|
||||||
|
|
||||||
|
static RecordBase s_root_record ;
|
||||||
|
|
||||||
|
const Kokkos::CudaHostPinnedSpace m_space ;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
~SharedAllocationRecord();
|
||||||
|
SharedAllocationRecord() : RecordBase(), m_space() {}
|
||||||
|
|
||||||
|
SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
, const RecordBase::function_type arg_dealloc = & deallocate
|
||||||
|
);
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
std::string get_label() const ;
|
||||||
|
|
||||||
|
static SharedAllocationRecord * allocate( const Kokkos::CudaHostPinnedSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
);
|
||||||
|
|
||||||
|
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
|
||||||
|
|
||||||
|
static void print_records( std::ostream & , const Kokkos::CudaHostPinnedSpace & , bool detail = false );
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Experimental
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||||
|
#endif /* #define KOKKOS_CUDASPACE_HPP */
|
||||||
|
|
||||||
497
lib/kokkos/core/src/Kokkos_ExecPolicy.hpp
Executable file
497
lib/kokkos/core/src/Kokkos_ExecPolicy.hpp
Executable file
@ -0,0 +1,497 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_EXECPOLICY_HPP
|
||||||
|
#define KOKKOS_EXECPOLICY_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Core_fwd.hpp>
|
||||||
|
#include <impl/Kokkos_Traits.hpp>
|
||||||
|
#include <impl/Kokkos_StaticAssert.hpp>
|
||||||
|
#include <impl/Kokkos_Tags.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Execution policy for work over a range of an integral type.
|
||||||
|
*
|
||||||
|
* Valid template argument options:
|
||||||
|
*
|
||||||
|
* With a specified execution space:
|
||||||
|
* < ExecSpace , WorkTag , { IntConst | IntType } >
|
||||||
|
* < ExecSpace , WorkTag , void >
|
||||||
|
* < ExecSpace , { IntConst | IntType } , void >
|
||||||
|
* < ExecSpace , void , void >
|
||||||
|
*
|
||||||
|
* With the default execution space:
|
||||||
|
* < WorkTag , { IntConst | IntType } , void >
|
||||||
|
* < WorkTag , void , void >
|
||||||
|
* < { IntConst | IntType } , void , void >
|
||||||
|
* < void , void , void >
|
||||||
|
*
|
||||||
|
* IntType is a fundamental integral type
|
||||||
|
* IntConst is an Impl::integral_constant< IntType , Blocking >
|
||||||
|
*
|
||||||
|
* Blocking is the granularity of partitioning the range among threads.
|
||||||
|
*/
|
||||||
|
template< class Arg0 = void , class Arg1 = void , class Arg2 = void
|
||||||
|
, class ExecSpace =
|
||||||
|
// The first argument is the execution space,
|
||||||
|
// otherwise use the default execution space.
|
||||||
|
typename Impl::if_c< Impl::is_execution_space< Arg0 >::value , Arg0
|
||||||
|
, Kokkos::DefaultExecutionSpace >::type
|
||||||
|
>
|
||||||
|
class RangePolicy {
|
||||||
|
private:
|
||||||
|
|
||||||
|
// Default integral type and blocking factor:
|
||||||
|
typedef int DefaultIntType ;
|
||||||
|
enum { DefaultIntValue = 8 };
|
||||||
|
|
||||||
|
enum { Arg0_Void = Impl::is_same< Arg0 , void >::value };
|
||||||
|
enum { Arg1_Void = Impl::is_same< Arg1 , void >::value };
|
||||||
|
enum { Arg2_Void = Impl::is_same< Arg2 , void >::value };
|
||||||
|
|
||||||
|
enum { Arg0_ExecSpace = Impl::is_execution_space< Arg0 >::value };
|
||||||
|
|
||||||
|
enum { Arg0_IntConst = Impl::is_integral_constant< Arg0 >::value };
|
||||||
|
enum { Arg1_IntConst = Impl::is_integral_constant< Arg1 >::value };
|
||||||
|
enum { Arg2_IntConst = Impl::is_integral_constant< Arg2 >::value };
|
||||||
|
|
||||||
|
enum { Arg0_IntType = Impl::is_integral< Arg0 >::value };
|
||||||
|
enum { Arg1_IntType = Impl::is_integral< Arg1 >::value };
|
||||||
|
enum { Arg2_IntType = Impl::is_integral< Arg2 >::value };
|
||||||
|
|
||||||
|
enum { Arg0_WorkTag = ! Arg0_ExecSpace && ! Arg0_IntConst && ! Arg0_IntType && ! Arg0_Void };
|
||||||
|
enum { Arg1_WorkTag = Arg0_ExecSpace && ! Arg1_IntConst && ! Arg1_IntType && ! Arg1_Void };
|
||||||
|
|
||||||
|
enum { ArgOption_OK = Impl::StaticAssert< (
|
||||||
|
( Arg0_ExecSpace && Arg1_WorkTag && ( Arg2_IntConst || Arg2_IntType ) ) ||
|
||||||
|
( Arg0_ExecSpace && Arg1_WorkTag && Arg2_Void ) ||
|
||||||
|
( Arg0_ExecSpace && ( Arg1_IntConst || Arg1_IntType ) && Arg2_Void ) ||
|
||||||
|
( Arg0_ExecSpace && Arg1_Void && Arg2_Void ) ||
|
||||||
|
( Arg0_WorkTag && ( Arg1_IntConst || Arg1_IntType ) && Arg2_Void ) ||
|
||||||
|
( Arg0_WorkTag && Arg1_Void && Arg2_Void ) ||
|
||||||
|
( ( Arg0_IntConst || Arg0_IntType ) && Arg1_Void && Arg2_Void ) ||
|
||||||
|
( Arg0_Void && Arg1_Void && Arg2_Void )
|
||||||
|
) >::value };
|
||||||
|
|
||||||
|
// The work argument tag is the first or second argument
|
||||||
|
typedef typename Impl::if_c< Arg0_WorkTag , Arg0 ,
|
||||||
|
typename Impl::if_c< Arg1_WorkTag , Arg1 , void
|
||||||
|
>::type >::type
|
||||||
|
WorkTag ;
|
||||||
|
|
||||||
|
enum { Granularity = Arg0_IntConst ? unsigned(Impl::is_integral_constant<Arg0>::integral_value) : (
|
||||||
|
Arg1_IntConst ? unsigned(Impl::is_integral_constant<Arg1>::integral_value) : (
|
||||||
|
Arg2_IntConst ? unsigned(Impl::is_integral_constant<Arg2>::integral_value) : (
|
||||||
|
unsigned(DefaultIntValue) ))) };
|
||||||
|
|
||||||
|
// Only accept the integral type if the blocking is a power of two
|
||||||
|
typedef typename Impl::enable_if< Impl::is_power_of_two< Granularity >::value ,
|
||||||
|
typename Impl::if_c< Arg0_IntType , Arg0 ,
|
||||||
|
typename Impl::if_c< Arg1_IntType , Arg1 ,
|
||||||
|
typename Impl::if_c< Arg2_IntType , Arg2 ,
|
||||||
|
typename Impl::if_c< Arg0_IntConst , typename Impl::is_integral_constant<Arg0>::integral_type ,
|
||||||
|
typename Impl::if_c< Arg1_IntConst , typename Impl::is_integral_constant<Arg1>::integral_type ,
|
||||||
|
typename Impl::if_c< Arg2_IntConst , typename Impl::is_integral_constant<Arg2>::integral_type ,
|
||||||
|
DefaultIntType
|
||||||
|
>::type >::type >::type
|
||||||
|
>::type >::type >::type
|
||||||
|
>::type
|
||||||
|
IntType ;
|
||||||
|
|
||||||
|
enum { GranularityMask = IntType(Granularity) - 1 };
|
||||||
|
|
||||||
|
ExecSpace m_space ;
|
||||||
|
IntType m_begin ;
|
||||||
|
IntType m_end ;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
//! Tag this class as an execution policy
|
||||||
|
typedef ExecSpace execution_space ;
|
||||||
|
typedef RangePolicy execution_policy ;
|
||||||
|
typedef WorkTag work_tag ;
|
||||||
|
typedef IntType member_type ;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION const execution_space & space() const { return m_space ; }
|
||||||
|
KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin ; }
|
||||||
|
KOKKOS_INLINE_FUNCTION member_type end() const { return m_end ; }
|
||||||
|
|
||||||
|
inline RangePolicy() : m_space(), m_begin(0), m_end(0) {}
|
||||||
|
|
||||||
|
/** \brief Total range */
|
||||||
|
inline
|
||||||
|
RangePolicy( const member_type work_begin
|
||||||
|
, const member_type work_end
|
||||||
|
)
|
||||||
|
: m_space()
|
||||||
|
, m_begin( work_begin < work_end ? work_begin : 0 )
|
||||||
|
, m_end( work_begin < work_end ? work_end : 0 )
|
||||||
|
{}
|
||||||
|
|
||||||
|
/** \brief Total range */
|
||||||
|
inline
|
||||||
|
RangePolicy( const execution_space & work_space
|
||||||
|
, const member_type work_begin
|
||||||
|
, const member_type work_end
|
||||||
|
)
|
||||||
|
: m_space( work_space )
|
||||||
|
, m_begin( work_begin < work_end ? work_begin : 0 )
|
||||||
|
, m_end( work_begin < work_end ? work_end : 0 )
|
||||||
|
{}
|
||||||
|
|
||||||
|
/** \brief Subrange for a partition's rank and size.
|
||||||
|
*
|
||||||
|
* Typically used to partition a range over a group of threads.
|
||||||
|
*/
|
||||||
|
struct WorkRange {
|
||||||
|
typedef RangePolicy::work_tag work_tag ;
|
||||||
|
typedef RangePolicy::member_type member_type ;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin ; }
|
||||||
|
KOKKOS_INLINE_FUNCTION member_type end() const { return m_end ; }
|
||||||
|
|
||||||
|
/** \brief Subrange for a partition's rank and size.
|
||||||
|
*
|
||||||
|
* Typically used to partition a range over a group of threads.
|
||||||
|
*/
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
WorkRange( const RangePolicy & range
|
||||||
|
, const int part_rank
|
||||||
|
, const int part_size
|
||||||
|
)
|
||||||
|
: m_begin(0), m_end(0)
|
||||||
|
{
|
||||||
|
if ( part_size ) {
|
||||||
|
|
||||||
|
// Split evenly among partitions, then round up to the granularity.
|
||||||
|
const member_type work_part =
|
||||||
|
( ( ( ( range.end() - range.begin() ) + ( part_size - 1 ) ) / part_size )
|
||||||
|
+ GranularityMask ) & ~member_type(GranularityMask);
|
||||||
|
|
||||||
|
m_begin = range.begin() + work_part * part_rank ;
|
||||||
|
m_end = m_begin + work_part ;
|
||||||
|
|
||||||
|
if ( range.end() < m_begin ) m_begin = range.end() ;
|
||||||
|
if ( range.end() < m_end ) m_end = range.end() ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
member_type m_begin ;
|
||||||
|
member_type m_end ;
|
||||||
|
WorkRange();
|
||||||
|
WorkRange & operator = ( const WorkRange & );
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Execution policy for parallel work over a league of teams of threads.
|
||||||
|
*
|
||||||
|
* The work functor is called for each thread of each team such that
|
||||||
|
* the team's member threads are guaranteed to be concurrent.
|
||||||
|
*
|
||||||
|
* The team's threads have access to team shared scratch memory and
|
||||||
|
* team collective operations.
|
||||||
|
*
|
||||||
|
* If the WorkTag is non-void then the first calling argument of the
|
||||||
|
* work functor's parentheses operator is 'const WorkTag &'.
|
||||||
|
* This allows a functor to have multiple work member functions.
|
||||||
|
*
|
||||||
|
* template argument option with specified execution space:
|
||||||
|
* < ExecSpace , WorkTag >
|
||||||
|
* < ExecSpace , void >
|
||||||
|
*
|
||||||
|
* template argument option with default execution space:
|
||||||
|
* < WorkTag , void >
|
||||||
|
* < void , void >
|
||||||
|
*/
|
||||||
|
template< class Arg0 = void
|
||||||
|
, class Arg1 = void
|
||||||
|
, class ExecSpace =
|
||||||
|
// If the first argument is not an execution
|
||||||
|
// then use the default execution space.
|
||||||
|
typename Impl::if_c< Impl::is_execution_space< Arg0 >::value , Arg0
|
||||||
|
, Kokkos::DefaultExecutionSpace >::type
|
||||||
|
>
|
||||||
|
class TeamPolicy {
|
||||||
|
private:
|
||||||
|
|
||||||
|
enum { Arg0_ExecSpace = Impl::is_execution_space< Arg0 >::value };
|
||||||
|
enum { Arg1_Void = Impl::is_same< Arg1 , void >::value };
|
||||||
|
enum { ArgOption_OK = Impl::StaticAssert< ( Arg0_ExecSpace || Arg1_Void ) >::value };
|
||||||
|
|
||||||
|
typedef typename Impl::if_c< Arg0_ExecSpace , Arg1 , Arg0 >::type WorkTag ;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
//! Tag this class as an execution policy
|
||||||
|
typedef TeamPolicy execution_policy ;
|
||||||
|
typedef ExecSpace execution_space ;
|
||||||
|
typedef WorkTag work_tag ;
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
/** \brief Query maximum team size for a given functor.
|
||||||
|
*
|
||||||
|
* This size takes into account execution space concurrency limitations and
|
||||||
|
* scratch memory space limitations for reductions, team reduce/scan, and
|
||||||
|
* team shared memory.
|
||||||
|
*/
|
||||||
|
template< class FunctorType >
|
||||||
|
static int team_size_max( const FunctorType & );
|
||||||
|
|
||||||
|
/** \brief Query recommended team size for a given functor.
|
||||||
|
*
|
||||||
|
* This size takes into account execution space concurrency limitations and
|
||||||
|
* scratch memory space limitations for reductions, team reduce/scan, and
|
||||||
|
* team shared memory.
|
||||||
|
*/
|
||||||
|
template< class FunctorType >
|
||||||
|
static int team_size_recommended( const FunctorType & );
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
static int team_size_recommended( const FunctorType & , const int&);
|
||||||
|
//----------------------------------------
|
||||||
|
/** \brief Construct policy with the given instance of the execution space */
|
||||||
|
TeamPolicy( const execution_space & , int league_size_request , int team_size_request );
|
||||||
|
|
||||||
|
/** \brief Construct policy with the default instance of the execution space */
|
||||||
|
TeamPolicy( int league_size_request , int team_size_request );
|
||||||
|
|
||||||
|
/** \brief The actual league size (number of teams) of the policy.
|
||||||
|
*
|
||||||
|
* This may be smaller than the requested league size due to limitations
|
||||||
|
* of the execution space.
|
||||||
|
*/
|
||||||
|
KOKKOS_INLINE_FUNCTION int league_size() const ;
|
||||||
|
|
||||||
|
/** \brief The actual team size (number of threads per team) of the policy.
|
||||||
|
*
|
||||||
|
* This may be smaller than the requested team size due to limitations
|
||||||
|
* of the execution space.
|
||||||
|
*/
|
||||||
|
KOKKOS_INLINE_FUNCTION int team_size() const ;
|
||||||
|
|
||||||
|
/** \brief Parallel execution of a functor calls the functor once with
|
||||||
|
* each member of the execution policy.
|
||||||
|
*/
|
||||||
|
struct member_type {
|
||||||
|
|
||||||
|
/** \brief Handle to the currently executing team shared scratch memory */
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
typename execution_space::scratch_memory_space team_shmem() const ;
|
||||||
|
|
||||||
|
/** \brief Rank of this team within the league of teams */
|
||||||
|
KOKKOS_INLINE_FUNCTION int league_rank() const ;
|
||||||
|
|
||||||
|
/** \brief Number of teams in the league */
|
||||||
|
KOKKOS_INLINE_FUNCTION int league_size() const ;
|
||||||
|
|
||||||
|
/** \brief Rank of this thread within this team */
|
||||||
|
KOKKOS_INLINE_FUNCTION int team_rank() const ;
|
||||||
|
|
||||||
|
/** \brief Number of threads in this team */
|
||||||
|
KOKKOS_INLINE_FUNCTION int team_size() const ;
|
||||||
|
|
||||||
|
/** \brief Barrier among the threads of this team */
|
||||||
|
KOKKOS_INLINE_FUNCTION void team_barrier() const ;
|
||||||
|
|
||||||
|
/** \brief Intra-team reduction. Returns join of all values of the team members. */
|
||||||
|
template< class JoinOp >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
typename JoinOp::value_type team_reduce( const typename JoinOp::value_type
|
||||||
|
, const JoinOp & ) const ;
|
||||||
|
|
||||||
|
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
|
||||||
|
*
|
||||||
|
* The highest rank thread can compute the reduction total as
|
||||||
|
* reduction_total = dev.team_scan( value ) + value ;
|
||||||
|
*/
|
||||||
|
template< typename Type >
|
||||||
|
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const ;
|
||||||
|
|
||||||
|
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
|
||||||
|
* with intra-team non-deterministic ordering accumulation.
|
||||||
|
*
|
||||||
|
* The global inter-team accumulation value will, at the end of the
|
||||||
|
* league's parallel execution, be the scan's total.
|
||||||
|
* Parallel execution ordering of the league's teams is non-deterministic.
|
||||||
|
* As such the base value for each team's scan operation is similarly
|
||||||
|
* non-deterministic.
|
||||||
|
*/
|
||||||
|
template< typename Type >
|
||||||
|
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const ;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template<typename iType, class TeamMemberType>
|
||||||
|
struct TeamThreadRangeBoundariesStruct {
|
||||||
|
private:
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION static
|
||||||
|
iType ibegin( const iType & arg_begin
|
||||||
|
, const iType & arg_end
|
||||||
|
, const iType & arg_rank
|
||||||
|
, const iType & arg_size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
return arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * arg_rank ;
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION static
|
||||||
|
iType iend( const iType & arg_begin
|
||||||
|
, const iType & arg_end
|
||||||
|
, const iType & arg_rank
|
||||||
|
, const iType & arg_size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
const iType end_ = arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * ( arg_rank + 1 );
|
||||||
|
return end_ < arg_end ? end_ : arg_end ;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef iType index_type;
|
||||||
|
const iType start;
|
||||||
|
const iType end;
|
||||||
|
enum {increment = 1};
|
||||||
|
const TeamMemberType& thread;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
TeamThreadRangeBoundariesStruct( const TeamMemberType& arg_thread
|
||||||
|
, const iType& arg_end
|
||||||
|
)
|
||||||
|
: start( ibegin( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
|
||||||
|
, end( iend( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
|
||||||
|
, thread( arg_thread )
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
TeamThreadRangeBoundariesStruct( const TeamMemberType& arg_thread
|
||||||
|
, const iType& arg_begin
|
||||||
|
, const iType& arg_end
|
||||||
|
)
|
||||||
|
: start( ibegin( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
|
||||||
|
, end( iend( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
|
||||||
|
, thread( arg_thread )
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename iType, class TeamMemberType>
|
||||||
|
struct ThreadVectorRangeBoundariesStruct {
|
||||||
|
typedef iType index_type;
|
||||||
|
enum {start = 0};
|
||||||
|
const iType end;
|
||||||
|
enum {increment = 1};
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
ThreadVectorRangeBoundariesStruct (const TeamMemberType& thread, const iType& count):
|
||||||
|
end( count )
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class TeamMemberType>
|
||||||
|
struct ThreadSingleStruct {
|
||||||
|
const TeamMemberType& team_member;
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
ThreadSingleStruct(const TeamMemberType& team_member_):team_member(team_member_){}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class TeamMemberType>
|
||||||
|
struct VectorSingleStruct {
|
||||||
|
const TeamMemberType& team_member;
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
VectorSingleStruct(const TeamMemberType& team_member_):team_member(team_member_){}
|
||||||
|
};
|
||||||
|
} // namespace Impl
|
||||||
|
|
||||||
|
/** \brief Execution policy for parallel work over a threads within a team.
|
||||||
|
*
|
||||||
|
* The range is split over all threads in a team. The Mapping scheme depends on the architecture.
|
||||||
|
* This policy is used together with a parallel pattern as a nested layer within a kernel launched
|
||||||
|
* with the TeamPolicy. This variant expects a single count. So the range is (0,count].
|
||||||
|
*/
|
||||||
|
template<typename iType, class TeamMemberType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(const TeamMemberType&, const iType& count);
|
||||||
|
|
||||||
|
/** \brief Execution policy for parallel work over a threads within a team.
|
||||||
|
*
|
||||||
|
* The range is split over all threads in a team. The Mapping scheme depends on the architecture.
|
||||||
|
* This policy is used together with a parallel pattern as a nested layer within a kernel launched
|
||||||
|
* with the TeamPolicy. This variant expects a begin and end. So the range is (begin,end].
|
||||||
|
*/
|
||||||
|
template<typename iType, class TeamMemberType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType> TeamThreadRange(const TeamMemberType&, const iType& begin, const iType& end);
|
||||||
|
|
||||||
|
/** \brief Execution policy for a vector parallel loop.
|
||||||
|
*
|
||||||
|
* The range is split over all vector lanes in a thread. The Mapping scheme depends on the architecture.
|
||||||
|
* This policy is used together with a parallel pattern as a nested layer within a kernel launched
|
||||||
|
* with the TeamPolicy. This variant expects a single count. So the range is (0,count].
|
||||||
|
*/
|
||||||
|
template<typename iType, class TeamMemberType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType> ThreadVectorRange(const TeamMemberType&, const iType& count);
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#endif /* #define KOKKOS_EXECPOLICY_HPP */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
270
lib/kokkos/core/src/Kokkos_HostSpace.hpp
Executable file
270
lib/kokkos/core/src/Kokkos_HostSpace.hpp
Executable file
@ -0,0 +1,270 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_HOSTSPACE_HPP
|
||||||
|
#define KOKKOS_HOSTSPACE_HPP
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <string>
|
||||||
|
#include <iosfwd>
|
||||||
|
#include <typeinfo>
|
||||||
|
|
||||||
|
#include <Kokkos_Core_fwd.hpp>
|
||||||
|
#include <Kokkos_MemoryTraits.hpp>
|
||||||
|
|
||||||
|
#include <impl/Kokkos_Traits.hpp>
|
||||||
|
#include <impl/Kokkos_Error.hpp>
|
||||||
|
|
||||||
|
#include <impl/Kokkos_AllocationTracker.hpp>
|
||||||
|
#include <impl/Kokkos_BasicAllocators.hpp>
|
||||||
|
|
||||||
|
#include <impl/KokkosExp_SharedAlloc.hpp>
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
/// \brief Initialize lock array for arbitrary size atomics.
|
||||||
|
///
|
||||||
|
/// Arbitrary atomics are implemented using a hash table of locks
|
||||||
|
/// where the hash value is derived from the address of the
|
||||||
|
/// object for which an atomic operation is performed.
|
||||||
|
/// This function initializes the locks to zero (unset).
|
||||||
|
void init_lock_array_host_space();
|
||||||
|
|
||||||
|
/// \brief Aquire a lock for the address
|
||||||
|
///
|
||||||
|
/// This function tries to aquire the lock for the hash value derived
|
||||||
|
/// from the provided ptr. If the lock is successfully aquired the
|
||||||
|
/// function returns true. Otherwise it returns false.
|
||||||
|
bool lock_address_host_space(void* ptr);
|
||||||
|
|
||||||
|
/// \brief Release lock for the address
|
||||||
|
///
|
||||||
|
/// This function releases the lock for the hash value derived
|
||||||
|
/// from the provided ptr. This function should only be called
|
||||||
|
/// after previously successfully aquiring a lock with
|
||||||
|
/// lock_address.
|
||||||
|
void unlock_address_host_space(void* ptr);
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/// \class HostSpace
|
||||||
|
/// \brief Memory management for host memory.
|
||||||
|
///
|
||||||
|
/// HostSpace is a memory space that governs host memory. "Host"
|
||||||
|
/// memory means the usual CPU-accessible memory.
|
||||||
|
class HostSpace {
|
||||||
|
public:
|
||||||
|
|
||||||
|
//! Tag this class as a kokkos memory space
|
||||||
|
typedef HostSpace memory_space ;
|
||||||
|
typedef size_t size_type ;
|
||||||
|
|
||||||
|
/// \typedef execution_space
|
||||||
|
/// \brief Default execution space for this memory space.
|
||||||
|
///
|
||||||
|
/// Every memory space has a default execution space. This is
|
||||||
|
/// useful for things like initializing a View (which happens in
|
||||||
|
/// parallel using the View's default execution space).
|
||||||
|
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||||
|
typedef Kokkos::OpenMP execution_space ;
|
||||||
|
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||||
|
typedef Kokkos::Threads execution_space ;
|
||||||
|
#elif defined( KOKKOS_HAVE_OPENMP )
|
||||||
|
typedef Kokkos::OpenMP execution_space ;
|
||||||
|
#elif defined( KOKKOS_HAVE_PTHREAD )
|
||||||
|
typedef Kokkos::Threads execution_space ;
|
||||||
|
#elif defined( KOKKOS_HAVE_SERIAL )
|
||||||
|
typedef Kokkos::Serial execution_space ;
|
||||||
|
#else
|
||||||
|
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//! This memory space preferred device_type
|
||||||
|
typedef Kokkos::Device<execution_space,memory_space> device_type;
|
||||||
|
|
||||||
|
|
||||||
|
#if defined( KOKKOS_USE_PAGE_ALIGNED_HOST_MEMORY )
|
||||||
|
typedef Impl::PageAlignedAllocator allocator ;
|
||||||
|
#else
|
||||||
|
typedef Impl::AlignedAllocator allocator ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/** \brief Allocate a contiguous block of memory.
|
||||||
|
*
|
||||||
|
* The input label is associated with the block of memory.
|
||||||
|
* The block of memory is tracked via reference counting where
|
||||||
|
* allocation gives it a reference count of one.
|
||||||
|
*/
|
||||||
|
static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
|
||||||
|
|
||||||
|
/*--------------------------------*/
|
||||||
|
/* Functions unique to the HostSpace */
|
||||||
|
static int in_parallel();
|
||||||
|
|
||||||
|
static void register_in_parallel( int (*)() );
|
||||||
|
|
||||||
|
/*--------------------------------*/
|
||||||
|
|
||||||
|
/**\brief Default memory space instance */
|
||||||
|
HostSpace();
|
||||||
|
HostSpace( const HostSpace & rhs ) = default ;
|
||||||
|
HostSpace & operator = ( const HostSpace & ) = default ;
|
||||||
|
~HostSpace() = default ;
|
||||||
|
|
||||||
|
/**\brief Non-default memory space instance to choose allocation mechansim, if available */
|
||||||
|
|
||||||
|
enum AllocationMechanism { STD_MALLOC , POSIX_MEMALIGN , POSIX_MMAP , INTEL_MM_ALLOC };
|
||||||
|
|
||||||
|
explicit
|
||||||
|
HostSpace( const AllocationMechanism & );
|
||||||
|
|
||||||
|
/**\brief Allocate memory in the host space */
|
||||||
|
void * allocate( const size_t arg_alloc_size ) const ;
|
||||||
|
|
||||||
|
/**\brief Deallocate memory in the host space */
|
||||||
|
void deallocate( void * const arg_alloc_ptr
|
||||||
|
, const size_t arg_alloc_size ) const ;
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
AllocationMechanism m_alloc_mech ;
|
||||||
|
|
||||||
|
friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > ;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Experimental {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template<>
|
||||||
|
class SharedAllocationRecord< Kokkos::HostSpace , void >
|
||||||
|
: public SharedAllocationRecord< void , void >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
friend Kokkos::HostSpace ;
|
||||||
|
|
||||||
|
typedef SharedAllocationRecord< void , void > RecordBase ;
|
||||||
|
|
||||||
|
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
|
||||||
|
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
|
||||||
|
|
||||||
|
static void deallocate( RecordBase * );
|
||||||
|
|
||||||
|
/**\brief Root record for tracked allocations from this HostSpace instance */
|
||||||
|
static RecordBase s_root_record ;
|
||||||
|
|
||||||
|
const Kokkos::HostSpace m_space ;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
~SharedAllocationRecord();
|
||||||
|
SharedAllocationRecord() = default ;
|
||||||
|
|
||||||
|
SharedAllocationRecord( const Kokkos::HostSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
, const RecordBase::function_type arg_dealloc = & deallocate
|
||||||
|
);
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
inline
|
||||||
|
std::string get_label() const
|
||||||
|
{
|
||||||
|
return std::string( RecordBase::head()->m_label );
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION static
|
||||||
|
SharedAllocationRecord * allocate( const Kokkos::HostSpace & arg_space
|
||||||
|
, const std::string & arg_label
|
||||||
|
, const size_t arg_alloc_size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size );
|
||||||
|
#else
|
||||||
|
return (SharedAllocationRecord *) 0 ;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
|
||||||
|
|
||||||
|
static void print_records( std::ostream & , const Kokkos::HostSpace & , bool detail = false );
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Experimental
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template< class , class > struct DeepCopy ;
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct DeepCopy<HostSpace,HostSpace> {
|
||||||
|
DeepCopy( void * dst , const void * src , size_t n );
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* #define KOKKOS_HOSTSPACE_HPP */
|
||||||
|
|
||||||
174
lib/kokkos/core/src/Kokkos_Layout.hpp
Executable file
174
lib/kokkos/core/src/Kokkos_Layout.hpp
Executable file
@ -0,0 +1,174 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
/// \file Kokkos_Layout.hpp
|
||||||
|
/// \brief Declaration of various \c MemoryLayout options.
|
||||||
|
|
||||||
|
#ifndef KOKKOS_LAYOUT_HPP
|
||||||
|
#define KOKKOS_LAYOUT_HPP
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <impl/Kokkos_Traits.hpp>
|
||||||
|
#include <impl/Kokkos_Tags.hpp>
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/// \struct LayoutLeft
|
||||||
|
/// \brief Memory layout tag indicating left-to-right (Fortran scheme)
|
||||||
|
/// striding of multi-indices.
|
||||||
|
///
|
||||||
|
/// This is an example of a \c MemoryLayout template parameter of
|
||||||
|
/// View. The memory layout describes how View maps from a
|
||||||
|
/// multi-index (i0, i1, ..., ik) to a memory location.
|
||||||
|
///
|
||||||
|
/// "Layout left" indicates a mapping where the leftmost index i0
|
||||||
|
/// refers to contiguous access, and strides increase for dimensions
|
||||||
|
/// going right from there (i1, i2, ...). This layout imitates how
|
||||||
|
/// Fortran stores multi-dimensional arrays. For the special case of
|
||||||
|
/// a two-dimensional array, "layout left" is also called "column
|
||||||
|
/// major."
|
||||||
|
struct LayoutLeft {
|
||||||
|
//! Tag this class as a kokkos array layout
|
||||||
|
typedef LayoutLeft array_layout ;
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/// \struct LayoutRight
|
||||||
|
/// \brief Memory layout tag indicating right-to-left (C or
|
||||||
|
/// lexigraphical scheme) striding of multi-indices.
|
||||||
|
///
|
||||||
|
/// This is an example of a \c MemoryLayout template parameter of
|
||||||
|
/// View. The memory layout describes how View maps from a
|
||||||
|
/// multi-index (i0, i1, ..., ik) to a memory location.
|
||||||
|
///
|
||||||
|
/// "Right layout" indicates a mapping where the rightmost index ik
|
||||||
|
/// refers to contiguous access, and strides increase for dimensions
|
||||||
|
/// going left from there. This layout imitates how C stores
|
||||||
|
/// multi-dimensional arrays. For the special case of a
|
||||||
|
/// two-dimensional array, "layout right" is also called "row major."
|
||||||
|
struct LayoutRight {
|
||||||
|
//! Tag this class as a kokkos array layout
|
||||||
|
typedef LayoutRight array_layout ;
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/// \struct LayoutStride
|
||||||
|
/// \brief Memory layout tag indicated arbitrarily strided
|
||||||
|
/// multi-index mapping into contiguous memory.
|
||||||
|
struct LayoutStride {
|
||||||
|
|
||||||
|
//! Tag this class as a kokkos array layout
|
||||||
|
typedef LayoutStride array_layout ;
|
||||||
|
|
||||||
|
enum { MAX_RANK = 8 };
|
||||||
|
|
||||||
|
size_t dimension[ MAX_RANK ] ;
|
||||||
|
size_t stride[ MAX_RANK ] ;
|
||||||
|
|
||||||
|
/** \brief Compute strides from ordered dimensions.
|
||||||
|
*
|
||||||
|
* Values of order uniquely form the set [0..rank)
|
||||||
|
* and specify ordering of the dimensions.
|
||||||
|
* Order = {0,1,2,...} is LayoutLeft
|
||||||
|
* Order = {...,2,1,0} is LayoutRight
|
||||||
|
*/
|
||||||
|
template< typename iTypeOrder , typename iTypeDimen >
|
||||||
|
KOKKOS_INLINE_FUNCTION static
|
||||||
|
LayoutStride order_dimensions( int const rank
|
||||||
|
, iTypeOrder const * const order
|
||||||
|
, iTypeDimen const * const dimen )
|
||||||
|
{
|
||||||
|
LayoutStride tmp ;
|
||||||
|
// Verify valid rank order:
|
||||||
|
int check_input = MAX_RANK < rank ? 0 : int( 1 << rank ) - 1 ;
|
||||||
|
for ( int r = 0 ; r < MAX_RANK ; ++r ) {
|
||||||
|
tmp.dimension[r] = 0 ;
|
||||||
|
tmp.stride[r] = 0 ;
|
||||||
|
check_input &= ~int( 1 << order[r] );
|
||||||
|
}
|
||||||
|
if ( 0 == check_input ) {
|
||||||
|
size_t n = 1 ;
|
||||||
|
for ( int r = 0 ; r < rank ; ++r ) {
|
||||||
|
tmp.stride[ order[r] ] = n ;
|
||||||
|
n *= ( dimen[order[r]] );
|
||||||
|
tmp.dimension[r] = dimen[r];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tmp ;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/// \struct LayoutTileLeft
|
||||||
|
/// \brief Memory layout tag indicating left-to-right (Fortran scheme)
|
||||||
|
/// striding of multi-indices by tiles.
|
||||||
|
///
|
||||||
|
/// This is an example of a \c MemoryLayout template parameter of
|
||||||
|
/// View. The memory layout describes how View maps from a
|
||||||
|
/// multi-index (i0, i1, ..., ik) to a memory location.
|
||||||
|
///
|
||||||
|
/// "Tiled layout" indicates a mapping to contiguously stored
|
||||||
|
/// <tt>ArgN0</tt> by <tt>ArgN1</tt> tiles for the rightmost two
|
||||||
|
/// dimensions. Indices are LayoutLeft within each tile, and the
|
||||||
|
/// tiles themselves are arranged using LayoutLeft. Note that the
|
||||||
|
/// dimensions <tt>ArgN0</tt> and <tt>ArgN1</tt> of the tiles must be
|
||||||
|
/// compile-time constants. This speeds up index calculations. If
|
||||||
|
/// both tile dimensions are powers of two, Kokkos can optimize
|
||||||
|
/// further.
|
||||||
|
template < unsigned ArgN0 , unsigned ArgN1 ,
|
||||||
|
bool IsPowerOfTwo = ( Impl::is_power_of_two<ArgN0>::value &&
|
||||||
|
Impl::is_power_of_two<ArgN1>::value )
|
||||||
|
>
|
||||||
|
struct LayoutTileLeft {
|
||||||
|
//! Tag this class as a kokkos array layout
|
||||||
|
typedef LayoutTileLeft<ArgN0,ArgN1,IsPowerOfTwo> array_layout ;
|
||||||
|
|
||||||
|
enum { N0 = ArgN0 };
|
||||||
|
enum { N1 = ArgN1 };
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#endif // #ifndef KOKKOS_LAYOUT_HPP
|
||||||
|
|
||||||
397
lib/kokkos/core/src/Kokkos_Macros.hpp
Executable file
397
lib/kokkos/core/src/Kokkos_Macros.hpp
Executable file
@ -0,0 +1,397 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_MACROS_HPP
|
||||||
|
#define KOKKOS_MACROS_HPP
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/** Pick up configure/build options via #define macros:
|
||||||
|
*
|
||||||
|
* KOKKOS_HAVE_CUDA Kokkos::Cuda execution and memory spaces
|
||||||
|
* KOKKOS_HAVE_PTHREAD Kokkos::Threads execution space
|
||||||
|
* KOKKOS_HAVE_QTHREAD Kokkos::Qthread execution space
|
||||||
|
* KOKKOS_HAVE_OPENMP Kokkos::OpenMP execution space
|
||||||
|
* KOKKOS_HAVE_HWLOC HWLOC library is available
|
||||||
|
* KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK insert array bounds checks, is expensive!
|
||||||
|
* KOKKOS_HAVE_CXX11 enable C++11 features
|
||||||
|
*
|
||||||
|
* KOKKOS_HAVE_MPI negotiate MPI/execution space interactions
|
||||||
|
*
|
||||||
|
* KOKKOS_USE_CUDA_UVM Use CUDA UVM for Cuda memory space
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
|
||||||
|
#include <KokkosCore_config.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/** Pick up compiler specific #define macros:
|
||||||
|
*
|
||||||
|
* Macros for known compilers evaluate to an integral version value
|
||||||
|
*
|
||||||
|
* KOKKOS_COMPILER_NVCC
|
||||||
|
* KOKKOS_COMPILER_GNU
|
||||||
|
* KOKKOS_COMPILER_INTEL
|
||||||
|
* KOKKOS_COMPILER_IBM
|
||||||
|
* KOKKOS_COMPILER_CRAYC
|
||||||
|
* KOKKOS_COMPILER_APPLECC
|
||||||
|
* KOKKOS_COMPILER_CLANG
|
||||||
|
* KOKKOS_COMPILER_PGI
|
||||||
|
*
|
||||||
|
* Macros for which compiler extension to use for atomics on intrinsice types
|
||||||
|
*
|
||||||
|
* KOKKOS_ATOMICS_USE_CUDA
|
||||||
|
* KOKKOS_ATOMICS_USE_GNU
|
||||||
|
* KOKKOS_ATOMICS_USE_INTEL
|
||||||
|
* KOKKOS_ATOMICS_USE_OPENMP31
|
||||||
|
*
|
||||||
|
* A suite of 'KOKKOS_HAVE_PRAGMA_...' are defined for internal use.
|
||||||
|
*
|
||||||
|
* Macros for marking functions to run in an execution space:
|
||||||
|
*
|
||||||
|
* KOKKOS_FUNCTION
|
||||||
|
* KOKKOS_INLINE_FUNCTION request compiler to inline
|
||||||
|
* KOKKOS_FORCEINLINE_FUNCTION force compiler to inline, use with care!
|
||||||
|
*/
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ )
|
||||||
|
|
||||||
|
/* Compiling with a CUDA compiler.
|
||||||
|
*
|
||||||
|
* Include <cuda.h> to pick up the CUDA_VERSION macro defined as:
|
||||||
|
* CUDA_VERSION = ( MAJOR_VERSION * 1000 ) + ( MINOR_VERSION * 10 )
|
||||||
|
*
|
||||||
|
* When generating device code the __CUDA_ARCH__ macro is defined as:
|
||||||
|
* __CUDA_ARCH__ = ( MAJOR_CAPABILITY * 100 ) + ( MINOR_CAPABILITY * 10 )
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <cuda.h>
|
||||||
|
|
||||||
|
#if ! defined( CUDA_VERSION )
|
||||||
|
#error "#include <cuda.h> did not define CUDA_VERSION"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ( CUDA_VERSION < 6050 )
|
||||||
|
// CUDA supports (inofficially) C++11 in device code starting with
|
||||||
|
// version 6.5. This includes auto type and device code internal
|
||||||
|
// lambdas.
|
||||||
|
#error "Cuda version 6.5 or greater required"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( __CUDA_ARCH__ ) && ( __CUDA_ARCH__ < 300 )
|
||||||
|
/* Compiling with CUDA compiler for device code. */
|
||||||
|
#error "Cuda device capability >= 3.0 is required"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ ) */
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/* Language info: C++, CUDA, OPENMP */
|
||||||
|
|
||||||
|
#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||||
|
// Compiling Cuda code to 'ptx'
|
||||||
|
|
||||||
|
#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
|
||||||
|
#define KOKKOS_INLINE_FUNCTION __device__ __host__ inline
|
||||||
|
#define KOKKOS_FUNCTION __device__ __host__
|
||||||
|
|
||||||
|
#endif /* #if defined( __CUDA_ARCH__ ) */
|
||||||
|
|
||||||
|
#if defined( _OPENMP )
|
||||||
|
|
||||||
|
/* Compiling with OpenMP.
|
||||||
|
* The value of _OPENMP is an integer value YYYYMM
|
||||||
|
* where YYYY and MM are the year and month designation
|
||||||
|
* of the supported OpenMP API version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#endif /* #if defined( _OPENMP ) */
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/* Mapping compiler built-ins to KOKKOS_COMPILER_*** macros */
|
||||||
|
|
||||||
|
#if defined( __NVCC__ )
|
||||||
|
// NVIDIA compiler is being used.
|
||||||
|
// Code is parsed and separated into host and device code.
|
||||||
|
// Host code is compiled again with another compiler.
|
||||||
|
// Device code is compile to 'ptx'.
|
||||||
|
#define KOKKOS_COMPILER_NVCC __NVCC__
|
||||||
|
|
||||||
|
#else
|
||||||
|
#if defined( KOKKOS_HAVE_CXX11 ) && ! defined( KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA )
|
||||||
|
// CUDA (including version 6.5) does not support giving lambdas as
|
||||||
|
// arguments to global functions. Thus its not currently possible
|
||||||
|
// to dispatch lambdas from the host.
|
||||||
|
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
|
||||||
|
#endif
|
||||||
|
#endif /* #if defined( __NVCC__ ) */
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_CXX11 ) && !defined (KOKKOS_LAMBDA)
|
||||||
|
#define KOKKOS_LAMBDA [=]
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ! defined( __CUDA_ARCH__ ) /* Not compiling Cuda code to 'ptx'. */
|
||||||
|
|
||||||
|
/* Intel compiler for host code */
|
||||||
|
|
||||||
|
#if defined( __INTEL_COMPILER )
|
||||||
|
#define KOKKOS_COMPILER_INTEL __INTEL_COMPILER
|
||||||
|
#elif defined( __ICC )
|
||||||
|
// Old define
|
||||||
|
#define KOKKOS_COMPILER_INTEL __ICC
|
||||||
|
#elif defined( __ECC )
|
||||||
|
// Very old define
|
||||||
|
#define KOKKOS_COMPILER_INTEL __ECC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* CRAY compiler for host code */
|
||||||
|
#if defined( _CRAYC )
|
||||||
|
#define KOKKOS_COMPILER_CRAYC _CRAYC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( __IBMCPP__ )
|
||||||
|
// IBM C++
|
||||||
|
#define KOKKOS_COMPILER_IBM __IBMCPP__
|
||||||
|
#elif defined( __IBMC__ )
|
||||||
|
#define KOKKOS_COMPILER_IBM __IBMC__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( __APPLE_CC__ )
|
||||||
|
#define KOKKOS_COMPILER_APPLECC __APPLE_CC__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined (__clang__) && !defined (KOKKOS_COMPILER_INTEL)
|
||||||
|
#define KOKKOS_COMPILER_CLANG __clang_major__*100+__clang_minor__*10+__clang_patchlevel__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ! defined( __clang__ ) && ! defined( KOKKOS_COMPILER_INTEL ) &&defined( __GNUC__ )
|
||||||
|
#define KOKKOS_COMPILER_GNU __GNUC__*100+__GNUC_MINOR__*10+__GNUC_PATCHLEVEL__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( __PGIC__ ) && ! defined( __GNUC__ )
|
||||||
|
#define KOKKOS_COMPILER_PGI __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* #if ! defined( __CUDA_ARCH__ ) */
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/* Intel compiler macros */
|
||||||
|
|
||||||
|
#if defined( KOKKOS_COMPILER_INTEL )
|
||||||
|
|
||||||
|
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||||
|
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||||
|
#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||||
|
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||||
|
#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||||
|
|
||||||
|
#if ( 1200 <= KOKKOS_COMPILER_INTEL ) && ! defined( KOKKOS_ENABLE_ASM ) && ! defined( _WIN32 )
|
||||||
|
#define KOKKOS_ENABLE_ASM 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ( 1200 <= KOKKOS_COMPILER_INTEL ) && ! defined( KOKKOS_FORCEINLINE_FUNCTION )
|
||||||
|
#if !defined (_WIN32)
|
||||||
|
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
|
||||||
|
#else
|
||||||
|
#define KOKKOS_FORCEINLINE_FUNCTION inline
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( __MIC__ )
|
||||||
|
// Compiling for Xeon Phi
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/* Cray compiler macros */
|
||||||
|
|
||||||
|
#if defined( KOKKOS_COMPILER_CRAYC )
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/* IBM Compiler macros */
|
||||||
|
|
||||||
|
#if defined( KOKKOS_COMPILER_IBM )
|
||||||
|
|
||||||
|
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/* CLANG compiler macros */
|
||||||
|
|
||||||
|
#if defined( KOKKOS_COMPILER_CLANG )
|
||||||
|
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||||
|
|
||||||
|
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
|
||||||
|
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/* GNU Compiler macros */
|
||||||
|
|
||||||
|
#if defined( KOKKOS_COMPILER_GNU )
|
||||||
|
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||||
|
|
||||||
|
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
|
||||||
|
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ! defined( KOKKOS_ENABLE_ASM ) && \
|
||||||
|
! ( defined( __powerpc) || \
|
||||||
|
defined(__powerpc__) || \
|
||||||
|
defined(__powerpc64__) || \
|
||||||
|
defined(__POWERPC__) || \
|
||||||
|
defined(__ppc__) || \
|
||||||
|
defined(__ppc64__) || \
|
||||||
|
defined(__PGIC__) )
|
||||||
|
#define KOKKOS_ENABLE_ASM 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
#if defined( KOKKOS_COMPILER_PGI )
|
||||||
|
|
||||||
|
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||||
|
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||||
|
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||||
|
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
#if defined( KOKKOS_COMPILER_NVCC )
|
||||||
|
|
||||||
|
#if defined(__CUDA_ARCH__ )
|
||||||
|
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/** Define function marking macros if compiler specific macros are undefined: */
|
||||||
|
|
||||||
|
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
|
||||||
|
#define KOKKOS_FORCEINLINE_FUNCTION inline
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ! defined( KOKKOS_INLINE_FUNCTION )
|
||||||
|
#define KOKKOS_INLINE_FUNCTION inline
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ! defined( KOKKOS_FUNCTION )
|
||||||
|
#define KOKKOS_FUNCTION /**/
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/** Determine the default execution space for parallel dispatch.
|
||||||
|
* There is zero or one default execution space specified.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if 1 < ( ( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
|
||||||
|
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
|
||||||
|
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
|
||||||
|
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
|
||||||
|
|
||||||
|
#error "More than one KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_* specified" ;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/** If default is not specified then chose from enabled execution spaces.
|
||||||
|
* Priority: CUDA, OPENMP, THREADS, SERIAL
|
||||||
|
*/
|
||||||
|
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||||
|
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||||
|
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||||
|
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||||
|
#elif defined ( KOKKOS_HAVE_CUDA )
|
||||||
|
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
|
||||||
|
#elif defined ( KOKKOS_HAVE_OPENMP )
|
||||||
|
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||||
|
#elif defined ( KOKKOS_HAVE_PTHREAD )
|
||||||
|
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
|
||||||
|
#else
|
||||||
|
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/** Determine for what space the code is being compiled: */
|
||||||
|
|
||||||
|
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_HAVE_CUDA)
|
||||||
|
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA
|
||||||
|
#else
|
||||||
|
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* #ifndef KOKKOS_MACROS_HPP */
|
||||||
|
|
||||||
116
lib/kokkos/core/src/Kokkos_MemoryTraits.hpp
Executable file
116
lib/kokkos/core/src/Kokkos_MemoryTraits.hpp
Executable file
@ -0,0 +1,116 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_MEMORYTRAITS_HPP
|
||||||
|
#define KOKKOS_MEMORYTRAITS_HPP
|
||||||
|
|
||||||
|
#include <impl/Kokkos_Traits.hpp>
|
||||||
|
#include <impl/Kokkos_Tags.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Memory access traits for views, an extension point.
|
||||||
|
*
|
||||||
|
* These traits should be orthogonal. If there are dependencies then
|
||||||
|
* the MemoryTraits template must detect and enforce dependencies.
|
||||||
|
*
|
||||||
|
* A zero value is the default for a View, indicating that none of
|
||||||
|
* these traits are present.
|
||||||
|
*/
|
||||||
|
enum MemoryTraitsFlags
|
||||||
|
{ Unmanaged = 0x01
|
||||||
|
, RandomAccess = 0x02
|
||||||
|
, Atomic = 0x04
|
||||||
|
};
|
||||||
|
|
||||||
|
template < unsigned T >
|
||||||
|
struct MemoryTraits {
|
||||||
|
//! Tag this class as a kokkos memory traits:
|
||||||
|
typedef MemoryTraits memory_traits ;
|
||||||
|
|
||||||
|
enum { Unmanaged = T & unsigned(Kokkos::Unmanaged) };
|
||||||
|
enum { RandomAccess = T & unsigned(Kokkos::RandomAccess) };
|
||||||
|
enum { Atomic = T & unsigned(Kokkos::Atomic) };
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
typedef Kokkos::MemoryTraits<0> MemoryManaged ;
|
||||||
|
typedef Kokkos::MemoryTraits< Kokkos::Unmanaged > MemoryUnmanaged ;
|
||||||
|
typedef Kokkos::MemoryTraits< Kokkos::Unmanaged | Kokkos::RandomAccess > MemoryRandomAccess ;
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
/** \brief Memory alignment settings
|
||||||
|
*
|
||||||
|
* Sets global value for memory alignment. Must be a power of two!
|
||||||
|
* Enable compatibility of views from different devices with static stride.
|
||||||
|
* Use compiler flag to enable overwrites.
|
||||||
|
*/
|
||||||
|
enum { MEMORY_ALIGNMENT =
|
||||||
|
#if defined( KOKKOS_MEMORY_ALIGNMENT )
|
||||||
|
( 1 << Kokkos::Impl::power_of_two< KOKKOS_MEMORY_ALIGNMENT >::value )
|
||||||
|
#else
|
||||||
|
( 1 << Kokkos::Impl::power_of_two< 128 >::value )
|
||||||
|
#endif
|
||||||
|
, MEMORY_ALIGNMENT_THRESHOLD = 4
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} //namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#endif /* #ifndef KOKKOS_MEMORYTRAITS_HPP */
|
||||||
|
|
||||||
175
lib/kokkos/core/src/Kokkos_OpenMP.hpp
Executable file
175
lib/kokkos/core/src/Kokkos_OpenMP.hpp
Executable file
@ -0,0 +1,175 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_OPENMP_HPP
|
||||||
|
#define KOKKOS_OPENMP_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Core_fwd.hpp>
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP )
|
||||||
|
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <iosfwd>
|
||||||
|
#include <Kokkos_HostSpace.hpp>
|
||||||
|
#include <Kokkos_ScratchSpace.hpp>
|
||||||
|
#include <Kokkos_Parallel.hpp>
|
||||||
|
#include <Kokkos_Layout.hpp>
|
||||||
|
#include <impl/Kokkos_Tags.hpp>
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/// \class OpenMP
|
||||||
|
/// \brief Kokkos device for multicore processors in the host memory space.
|
||||||
|
class OpenMP {
|
||||||
|
public:
|
||||||
|
//------------------------------------
|
||||||
|
//! \name Type declarations that all Kokkos devices must provide.
|
||||||
|
//@{
|
||||||
|
|
||||||
|
//! Tag this class as a kokkos execution space
|
||||||
|
typedef OpenMP execution_space ;
|
||||||
|
typedef HostSpace memory_space ;
|
||||||
|
//! This execution space preferred device_type
|
||||||
|
typedef Kokkos::Device<execution_space,memory_space> device_type;
|
||||||
|
|
||||||
|
typedef LayoutRight array_layout ;
|
||||||
|
typedef HostSpace::size_type size_type ;
|
||||||
|
|
||||||
|
typedef ScratchMemorySpace< OpenMP > scratch_memory_space ;
|
||||||
|
|
||||||
|
//@}
|
||||||
|
//------------------------------------
|
||||||
|
//! \name Functions that all Kokkos devices must implement.
|
||||||
|
//@{
|
||||||
|
|
||||||
|
inline static bool in_parallel() { return omp_in_parallel(); }
|
||||||
|
|
||||||
|
/** \brief Set the device in a "sleep" state. A noop for OpenMP. */
|
||||||
|
static bool sleep();
|
||||||
|
|
||||||
|
/** \brief Wake the device from the 'sleep' state. A noop for OpenMP. */
|
||||||
|
static bool wake();
|
||||||
|
|
||||||
|
/** \brief Wait until all dispatched functors complete. A noop for OpenMP. */
|
||||||
|
static void fence() {}
|
||||||
|
|
||||||
|
/// \brief Print configuration information to the given output stream.
|
||||||
|
static void print_configuration( std::ostream & , const bool detail = false );
|
||||||
|
|
||||||
|
/// \brief Free any resources being consumed by the device.
|
||||||
|
static void finalize();
|
||||||
|
|
||||||
|
/** \brief Initialize the device.
|
||||||
|
*
|
||||||
|
* 1) If the hardware locality library is enabled and OpenMP has not
|
||||||
|
* already bound threads then bind OpenMP threads to maximize
|
||||||
|
* core utilization and group for memory hierarchy locality.
|
||||||
|
*
|
||||||
|
* 2) Allocate a HostThread for each OpenMP thread to hold its
|
||||||
|
* topology and fan in/out data.
|
||||||
|
*/
|
||||||
|
static void initialize( unsigned thread_count = 0 ,
|
||||||
|
unsigned use_numa_count = 0 ,
|
||||||
|
unsigned use_cores_per_numa = 0 );
|
||||||
|
|
||||||
|
static int is_initialized();
|
||||||
|
//@}
|
||||||
|
//------------------------------------
|
||||||
|
/** \brief This execution space has a topological thread pool which can be queried.
|
||||||
|
*
|
||||||
|
* All threads within a pool have a common memory space for which they are cache coherent.
|
||||||
|
* depth = 0 gives the number of threads in the whole pool.
|
||||||
|
* depth = 1 gives the number of threads in a NUMA region, typically sharing L3 cache.
|
||||||
|
* depth = 2 gives the number of threads at the finest granularity, typically sharing L1 cache.
|
||||||
|
*/
|
||||||
|
inline static int thread_pool_size( int depth = 0 );
|
||||||
|
|
||||||
|
/** \brief The rank of the executing thread in this thread pool */
|
||||||
|
KOKKOS_INLINE_FUNCTION static int thread_pool_rank();
|
||||||
|
|
||||||
|
//------------------------------------
|
||||||
|
|
||||||
|
inline static unsigned max_hardware_threads() { return thread_pool_size(0); }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION static
|
||||||
|
unsigned hardware_thread_id() { return thread_pool_rank(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace
|
||||||
|
< Kokkos::OpenMP::memory_space
|
||||||
|
, Kokkos::OpenMP::scratch_memory_space
|
||||||
|
>
|
||||||
|
{
|
||||||
|
enum { value = true };
|
||||||
|
inline static void verify( void ) { }
|
||||||
|
inline static void verify( const void * ) { }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
#include <OpenMP/Kokkos_OpenMPexec.hpp>
|
||||||
|
#include <OpenMP/Kokkos_OpenMP_Parallel.hpp>
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP ) */
|
||||||
|
#endif /* #ifndef KOKKOS_OPENMP_HPP */
|
||||||
|
|
||||||
|
|
||||||
498
lib/kokkos/core/src/Kokkos_Pair.hpp
Executable file
498
lib/kokkos/core/src/Kokkos_Pair.hpp
Executable file
@ -0,0 +1,498 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
|
||||||
|
/// \file Kokkos_Pair.hpp
|
||||||
|
/// \brief Declaration and definition of Kokkos::pair.
|
||||||
|
///
|
||||||
|
/// This header file declares and defines Kokkos::pair and its related
|
||||||
|
/// nonmember functions.
|
||||||
|
|
||||||
|
#ifndef KOKKOS_PAIR_HPP
|
||||||
|
#define KOKKOS_PAIR_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Macros.hpp>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
/// \struct pair
|
||||||
|
/// \brief Replacement for std::pair that works on CUDA devices.
|
||||||
|
///
|
||||||
|
/// The instance methods of std::pair, including its constructors, are
|
||||||
|
/// not marked as <tt>__device__</tt> functions. Thus, they cannot be
|
||||||
|
/// called on a CUDA device, such as an NVIDIA GPU. This struct
|
||||||
|
/// implements the same interface as std::pair, but can be used on a
|
||||||
|
/// CUDA device as well as on the host.
|
||||||
|
template <class T1, class T2>
|
||||||
|
struct pair
|
||||||
|
{
|
||||||
|
//! The first template parameter of this class.
|
||||||
|
typedef T1 first_type;
|
||||||
|
//! The second template parameter of this class.
|
||||||
|
typedef T2 second_type;
|
||||||
|
|
||||||
|
//! The first element of the pair.
|
||||||
|
first_type first;
|
||||||
|
//! The second element of the pair.
|
||||||
|
second_type second;
|
||||||
|
|
||||||
|
/// \brief Default constructor.
|
||||||
|
///
|
||||||
|
/// This calls the default constructors of T1 and T2. It won't
|
||||||
|
/// compile if those default constructors are not defined and
|
||||||
|
/// public.
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair()
|
||||||
|
: first(), second()
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// \brief Constructor that takes both elements of the pair.
|
||||||
|
///
|
||||||
|
/// This calls the copy constructors of T1 and T2. It won't compile
|
||||||
|
/// if those copy constructors are not defined and public.
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair(first_type const& f, second_type const& s)
|
||||||
|
: first(f), second(s)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// \brief Copy constructor.
|
||||||
|
///
|
||||||
|
/// This calls the copy constructors of T1 and T2. It won't compile
|
||||||
|
/// if those copy constructors are not defined and public.
|
||||||
|
template <class U, class V>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair( const pair<U,V> &p)
|
||||||
|
: first(p.first), second(p.second)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// \brief Assignment operator.
|
||||||
|
///
|
||||||
|
/// This calls the assignment operators of T1 and T2. It won't
|
||||||
|
/// compile if the assignment operators are not defined and public.
|
||||||
|
template <class U, class V>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair<T1, T2> & operator=(const pair<U,V> &p)
|
||||||
|
{
|
||||||
|
first = p.first;
|
||||||
|
second = p.second;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
// from std::pair<U,V>
|
||||||
|
template <class U, class V>
|
||||||
|
pair( const std::pair<U,V> &p)
|
||||||
|
: first(p.first), second(p.second)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// \brief Return the std::pair version of this object.
|
||||||
|
///
|
||||||
|
/// This is <i>not</i> a device function; you may not call it on a
|
||||||
|
/// CUDA device. It is meant to be called on the host, if the user
|
||||||
|
/// wants an std::pair instead of a Kokkos::pair.
|
||||||
|
///
|
||||||
|
/// \note This is not a conversion operator, since defining a
|
||||||
|
/// conversion operator made the relational operators have
|
||||||
|
/// ambiguous definitions.
|
||||||
|
std::pair<T1,T2> to_std_pair() const
|
||||||
|
{ return std::make_pair(first,second); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T1, class T2>
|
||||||
|
struct pair<T1&, T2&>
|
||||||
|
{
|
||||||
|
//! The first template parameter of this class.
|
||||||
|
typedef T1& first_type;
|
||||||
|
//! The second template parameter of this class.
|
||||||
|
typedef T2& second_type;
|
||||||
|
|
||||||
|
//! The first element of the pair.
|
||||||
|
first_type first;
|
||||||
|
//! The second element of the pair.
|
||||||
|
second_type second;
|
||||||
|
|
||||||
|
/// \brief Constructor that takes both elements of the pair.
|
||||||
|
///
|
||||||
|
/// This calls the copy constructors of T1 and T2. It won't compile
|
||||||
|
/// if those copy constructors are not defined and public.
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair(first_type f, second_type s)
|
||||||
|
: first(f), second(s)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// \brief Copy constructor.
|
||||||
|
///
|
||||||
|
/// This calls the copy constructors of T1 and T2. It won't compile
|
||||||
|
/// if those copy constructors are not defined and public.
|
||||||
|
template <class U, class V>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair( const pair<U,V> &p)
|
||||||
|
: first(p.first), second(p.second)
|
||||||
|
{}
|
||||||
|
|
||||||
|
// from std::pair<U,V>
|
||||||
|
template <class U, class V>
|
||||||
|
pair( const std::pair<U,V> &p)
|
||||||
|
: first(p.first), second(p.second)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// \brief Assignment operator.
|
||||||
|
///
|
||||||
|
/// This calls the assignment operators of T1 and T2. It won't
|
||||||
|
/// compile if the assignment operators are not defined and public.
|
||||||
|
template <class U, class V>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair<first_type, second_type> & operator=(const pair<U,V> &p)
|
||||||
|
{
|
||||||
|
first = p.first;
|
||||||
|
second = p.second;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Return the std::pair version of this object.
|
||||||
|
///
|
||||||
|
/// This is <i>not</i> a device function; you may not call it on a
|
||||||
|
/// CUDA device. It is meant to be called on the host, if the user
|
||||||
|
/// wants an std::pair instead of a Kokkos::pair.
|
||||||
|
///
|
||||||
|
/// \note This is not a conversion operator, since defining a
|
||||||
|
/// conversion operator made the relational operators have
|
||||||
|
/// ambiguous definitions.
|
||||||
|
std::pair<T1,T2> to_std_pair() const
|
||||||
|
{ return std::make_pair(first,second); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T1, class T2>
|
||||||
|
struct pair<T1, T2&>
|
||||||
|
{
|
||||||
|
//! The first template parameter of this class.
|
||||||
|
typedef T1 first_type;
|
||||||
|
//! The second template parameter of this class.
|
||||||
|
typedef T2& second_type;
|
||||||
|
|
||||||
|
//! The first element of the pair.
|
||||||
|
first_type first;
|
||||||
|
//! The second element of the pair.
|
||||||
|
second_type second;
|
||||||
|
|
||||||
|
/// \brief Constructor that takes both elements of the pair.
|
||||||
|
///
|
||||||
|
/// This calls the copy constructors of T1 and T2. It won't compile
|
||||||
|
/// if those copy constructors are not defined and public.
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair(first_type const& f, second_type s)
|
||||||
|
: first(f), second(s)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// \brief Copy constructor.
|
||||||
|
///
|
||||||
|
/// This calls the copy constructors of T1 and T2. It won't compile
|
||||||
|
/// if those copy constructors are not defined and public.
|
||||||
|
template <class U, class V>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair( const pair<U,V> &p)
|
||||||
|
: first(p.first), second(p.second)
|
||||||
|
{}
|
||||||
|
|
||||||
|
// from std::pair<U,V>
|
||||||
|
template <class U, class V>
|
||||||
|
pair( const std::pair<U,V> &p)
|
||||||
|
: first(p.first), second(p.second)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// \brief Assignment operator.
|
||||||
|
///
|
||||||
|
/// This calls the assignment operators of T1 and T2. It won't
|
||||||
|
/// compile if the assignment operators are not defined and public.
|
||||||
|
template <class U, class V>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair<first_type, second_type> & operator=(const pair<U,V> &p)
|
||||||
|
{
|
||||||
|
first = p.first;
|
||||||
|
second = p.second;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Return the std::pair version of this object.
|
||||||
|
///
|
||||||
|
/// This is <i>not</i> a device function; you may not call it on a
|
||||||
|
/// CUDA device. It is meant to be called on the host, if the user
|
||||||
|
/// wants an std::pair instead of a Kokkos::pair.
|
||||||
|
///
|
||||||
|
/// \note This is not a conversion operator, since defining a
|
||||||
|
/// conversion operator made the relational operators have
|
||||||
|
/// ambiguous definitions.
|
||||||
|
std::pair<T1,T2> to_std_pair() const
|
||||||
|
{ return std::make_pair(first,second); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T1, class T2>
|
||||||
|
struct pair<T1&, T2>
|
||||||
|
{
|
||||||
|
//! The first template parameter of this class.
|
||||||
|
typedef T1& first_type;
|
||||||
|
//! The second template parameter of this class.
|
||||||
|
typedef T2 second_type;
|
||||||
|
|
||||||
|
//! The first element of the pair.
|
||||||
|
first_type first;
|
||||||
|
//! The second element of the pair.
|
||||||
|
second_type second;
|
||||||
|
|
||||||
|
/// \brief Constructor that takes both elements of the pair.
|
||||||
|
///
|
||||||
|
/// This calls the copy constructors of T1 and T2. It won't compile
|
||||||
|
/// if those copy constructors are not defined and public.
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair(first_type f, second_type const& s)
|
||||||
|
: first(f), second(s)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// \brief Copy constructor.
|
||||||
|
///
|
||||||
|
/// This calls the copy constructors of T1 and T2. It won't compile
|
||||||
|
/// if those copy constructors are not defined and public.
|
||||||
|
template <class U, class V>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair( const pair<U,V> &p)
|
||||||
|
: first(p.first), second(p.second)
|
||||||
|
{}
|
||||||
|
|
||||||
|
// from std::pair<U,V>
|
||||||
|
template <class U, class V>
|
||||||
|
pair( const std::pair<U,V> &p)
|
||||||
|
: first(p.first), second(p.second)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/// \brief Assignment operator.
|
||||||
|
///
|
||||||
|
/// This calls the assignment operators of T1 and T2. It won't
|
||||||
|
/// compile if the assignment operators are not defined and public.
|
||||||
|
template <class U, class V>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair<first_type, second_type> & operator=(const pair<U,V> &p)
|
||||||
|
{
|
||||||
|
first = p.first;
|
||||||
|
second = p.second;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Return the std::pair version of this object.
|
||||||
|
///
|
||||||
|
/// This is <i>not</i> a device function; you may not call it on a
|
||||||
|
/// CUDA device. It is meant to be called on the host, if the user
|
||||||
|
/// wants an std::pair instead of a Kokkos::pair.
|
||||||
|
///
|
||||||
|
/// \note This is not a conversion operator, since defining a
|
||||||
|
/// conversion operator made the relational operators have
|
||||||
|
/// ambiguous definitions.
|
||||||
|
std::pair<T1,T2> to_std_pair() const
|
||||||
|
{ return std::make_pair(first,second); }
|
||||||
|
};
|
||||||
|
|
||||||
|
//! Equality operator for Kokkos::pair.
|
||||||
|
template <class T1, class T2>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator== (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs)
|
||||||
|
{ return lhs.first==rhs.first && lhs.second==rhs.second; }
|
||||||
|
|
||||||
|
//! Inequality operator for Kokkos::pair.
|
||||||
|
template <class T1, class T2>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator!= (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs)
|
||||||
|
{ return !(lhs==rhs); }
|
||||||
|
|
||||||
|
//! Less-than operator for Kokkos::pair.
|
||||||
|
template <class T1, class T2>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator< (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs)
|
||||||
|
{ return lhs.first<rhs.first || (!(rhs.first<lhs.first) && lhs.second<rhs.second); }
|
||||||
|
|
||||||
|
//! Less-than-or-equal-to operator for Kokkos::pair.
|
||||||
|
template <class T1, class T2>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator<= (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs)
|
||||||
|
{ return !(rhs<lhs); }
|
||||||
|
|
||||||
|
//! Greater-than operator for Kokkos::pair.
|
||||||
|
template <class T1, class T2>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator> (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs)
|
||||||
|
{ return rhs<lhs; }
|
||||||
|
|
||||||
|
//! Greater-than-or-equal-to operator for Kokkos::pair.
|
||||||
|
template <class T1, class T2>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator>= (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs)
|
||||||
|
{ return !(lhs<rhs); }
|
||||||
|
|
||||||
|
/// \brief Return a new pair.
|
||||||
|
///
|
||||||
|
/// This is a "nonmember constructor" for Kokkos::pair. It works just
|
||||||
|
/// like std::make_pair.
|
||||||
|
template <class T1,class T2>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair<T1,T2> make_pair (T1 x, T2 y)
|
||||||
|
{ return ( pair<T1,T2>(x,y) ); }
|
||||||
|
|
||||||
|
/// \brief Return a pair of references to the input arguments.
|
||||||
|
///
|
||||||
|
/// This compares to std::tie (new in C++11). You can use it to
|
||||||
|
/// assign to two variables at once, from the result of a function
|
||||||
|
/// that returns a pair. For example (<tt>__device__</tt> and
|
||||||
|
/// <tt>__host__</tt> attributes omitted for brevity):
|
||||||
|
/// \code
|
||||||
|
/// // Declaration of the function to call.
|
||||||
|
/// // First return value: operation count.
|
||||||
|
/// // Second return value: whether all operations succeeded.
|
||||||
|
/// Kokkos::pair<int, bool> someFunction ();
|
||||||
|
///
|
||||||
|
/// // Code that uses Kokkos::tie.
|
||||||
|
/// int myFunction () {
|
||||||
|
/// int count = 0;
|
||||||
|
/// bool success = false;
|
||||||
|
///
|
||||||
|
/// // This assigns to both count and success.
|
||||||
|
/// Kokkos::tie (count, success) = someFunction ();
|
||||||
|
///
|
||||||
|
/// if (! success) {
|
||||||
|
/// // ... Some operation failed;
|
||||||
|
/// // take corrective action ...
|
||||||
|
/// }
|
||||||
|
/// return count;
|
||||||
|
/// }
|
||||||
|
/// \endcode
|
||||||
|
///
|
||||||
|
/// The line that uses tie() could have been written like this:
|
||||||
|
/// \code
|
||||||
|
/// Kokkos::pair<int, bool> result = someFunction ();
|
||||||
|
/// count = result.first;
|
||||||
|
/// success = result.second;
|
||||||
|
/// \endcode
|
||||||
|
///
|
||||||
|
/// Using tie() saves two lines of code and avoids a copy of each
|
||||||
|
/// element of the pair. The latter could be significant if one or
|
||||||
|
/// both elements of the pair are more substantial objects than \c int
|
||||||
|
/// or \c bool.
|
||||||
|
template <class T1,class T2>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair<T1 &,T2 &> tie (T1 & x, T2 & y)
|
||||||
|
{ return ( pair<T1 &,T2 &>(x,y) ); }
|
||||||
|
|
||||||
|
//
|
||||||
|
// Specialization of Kokkos::pair for a \c void second argument. This
|
||||||
|
// is not actually a "pair"; it only contains one element, the first.
|
||||||
|
//
|
||||||
|
template <class T1>
|
||||||
|
struct pair<T1,void>
|
||||||
|
{
|
||||||
|
typedef T1 first_type;
|
||||||
|
typedef void second_type;
|
||||||
|
|
||||||
|
first_type first;
|
||||||
|
enum { second = 0 };
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair()
|
||||||
|
: first()
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair(const first_type & f)
|
||||||
|
: first(f)
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair(const first_type & f, int)
|
||||||
|
: first(f)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template <class U>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair( const pair<U,void> &p)
|
||||||
|
: first(p.first)
|
||||||
|
{}
|
||||||
|
|
||||||
|
template <class U>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
pair<T1, void> & operator=(const pair<U,void> &p)
|
||||||
|
{
|
||||||
|
first = p.first;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// Specialization of relational operators for Kokkos::pair<T1,void>.
|
||||||
|
//
|
||||||
|
|
||||||
|
template <class T1>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator== (const pair<T1,void>& lhs, const pair<T1,void>& rhs)
|
||||||
|
{ return lhs.first==rhs.first; }
|
||||||
|
|
||||||
|
template <class T1>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator!= (const pair<T1,void>& lhs, const pair<T1,void>& rhs)
|
||||||
|
{ return !(lhs==rhs); }
|
||||||
|
|
||||||
|
template <class T1>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator< (const pair<T1,void>& lhs, const pair<T1,void>& rhs)
|
||||||
|
{ return lhs.first<rhs.first; }
|
||||||
|
|
||||||
|
template <class T1>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator<= (const pair<T1,void>& lhs, const pair<T1,void>& rhs)
|
||||||
|
{ return !(rhs<lhs); }
|
||||||
|
|
||||||
|
template <class T1>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator> (const pair<T1,void>& lhs, const pair<T1,void>& rhs)
|
||||||
|
{ return rhs<lhs; }
|
||||||
|
|
||||||
|
template <class T1>
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
bool operator>= (const pair<T1,void>& lhs, const pair<T1,void>& rhs)
|
||||||
|
{ return !(lhs<rhs); }
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
|
||||||
|
#endif //KOKKOS_PAIR_HPP
|
||||||
908
lib/kokkos/core/src/Kokkos_Parallel.hpp
Executable file
908
lib/kokkos/core/src/Kokkos_Parallel.hpp
Executable file
@ -0,0 +1,908 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
/// \file Kokkos_Parallel.hpp
|
||||||
|
/// \brief Declaration of parallel operators
|
||||||
|
|
||||||
|
#ifndef KOKKOS_PARALLEL_HPP
|
||||||
|
#define KOKKOS_PARALLEL_HPP
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <Kokkos_Core_fwd.hpp>
|
||||||
|
#include <Kokkos_View.hpp>
|
||||||
|
#include <Kokkos_ExecPolicy.hpp>
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
#include <impl/Kokkos_Profiling_Interface.hpp>
|
||||||
|
#include <typeinfo>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <impl/Kokkos_AllocationTracker.hpp>
|
||||||
|
#include <impl/Kokkos_Tags.hpp>
|
||||||
|
#include <impl/Kokkos_Traits.hpp>
|
||||||
|
#include <impl/Kokkos_FunctorAdapter.hpp>
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_DEBUG
|
||||||
|
#include<iostream>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/** \brief Given a Functor and Execution Policy query an execution space.
|
||||||
|
*
|
||||||
|
* if the Policy has an execution space use that
|
||||||
|
* else if the Functor has an execution_space use that
|
||||||
|
* else if the Functor has a device_type use that for backward compatibility
|
||||||
|
* else use the default
|
||||||
|
*/
|
||||||
|
template< class Functor
|
||||||
|
, class Policy
|
||||||
|
, class EnableFunctor = void
|
||||||
|
, class EnablePolicy = void
|
||||||
|
>
|
||||||
|
struct FunctorPolicyExecutionSpace {
|
||||||
|
typedef Kokkos::DefaultExecutionSpace execution_space ;
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class Functor , class Policy >
|
||||||
|
struct FunctorPolicyExecutionSpace
|
||||||
|
< Functor , Policy
|
||||||
|
, typename enable_if_type< typename Functor::device_type >::type
|
||||||
|
, typename enable_if_type< typename Policy ::execution_space >::type
|
||||||
|
>
|
||||||
|
{
|
||||||
|
typedef typename Policy ::execution_space execution_space ;
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class Functor , class Policy >
|
||||||
|
struct FunctorPolicyExecutionSpace
|
||||||
|
< Functor , Policy
|
||||||
|
, typename enable_if_type< typename Functor::execution_space >::type
|
||||||
|
, typename enable_if_type< typename Policy ::execution_space >::type
|
||||||
|
>
|
||||||
|
{
|
||||||
|
typedef typename Policy ::execution_space execution_space ;
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class Functor , class Policy , class EnableFunctor >
|
||||||
|
struct FunctorPolicyExecutionSpace
|
||||||
|
< Functor , Policy
|
||||||
|
, EnableFunctor
|
||||||
|
, typename enable_if_type< typename Policy::execution_space >::type
|
||||||
|
>
|
||||||
|
{
|
||||||
|
typedef typename Policy ::execution_space execution_space ;
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class Functor , class Policy , class EnablePolicy >
|
||||||
|
struct FunctorPolicyExecutionSpace
|
||||||
|
< Functor , Policy
|
||||||
|
, typename enable_if_type< typename Functor::device_type >::type
|
||||||
|
, EnablePolicy
|
||||||
|
>
|
||||||
|
{
|
||||||
|
typedef typename Functor::device_type execution_space ;
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class Functor , class Policy , class EnablePolicy >
|
||||||
|
struct FunctorPolicyExecutionSpace
|
||||||
|
< Functor , Policy
|
||||||
|
, typename enable_if_type< typename Functor::execution_space >::type
|
||||||
|
, EnablePolicy
|
||||||
|
>
|
||||||
|
{
|
||||||
|
typedef typename Functor::execution_space execution_space ;
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/// \class ParallelFor
|
||||||
|
/// \brief Implementation of the ParallelFor operator that has a
|
||||||
|
/// partial specialization for the device.
|
||||||
|
///
|
||||||
|
/// This is an implementation detail of parallel_for. Users should
|
||||||
|
/// skip this and go directly to the nonmember function parallel_for.
|
||||||
|
template< class FunctorType , class ExecPolicy > class ParallelFor ;
|
||||||
|
|
||||||
|
/// \class ParallelReduce
|
||||||
|
/// \brief Implementation detail of parallel_reduce.
|
||||||
|
///
|
||||||
|
/// This is an implementation detail of parallel_reduce. Users should
|
||||||
|
/// skip this and go directly to the nonmember function parallel_reduce.
|
||||||
|
template< class FunctorType , class ExecPolicy > class ParallelReduce ;
|
||||||
|
|
||||||
|
/// \class ParallelScan
|
||||||
|
/// \brief Implementation detail of parallel_scan.
|
||||||
|
///
|
||||||
|
/// This is an implementation detail of parallel_scan. Users should
|
||||||
|
/// skip this and go directly to the documentation of the nonmember
|
||||||
|
/// template function Kokkos::parallel_scan.
|
||||||
|
template< class FunctorType , class ExecPolicy > class ParallelScan ;
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Execute \c functor in parallel according to the execution \c policy.
|
||||||
|
*
|
||||||
|
* A "functor" is a class containing the function to execute in parallel,
|
||||||
|
* data needed for that execution, and an optional \c execution_space
|
||||||
|
* typedef. Here is an example functor for parallel_for:
|
||||||
|
*
|
||||||
|
* \code
|
||||||
|
* class FunctorType {
|
||||||
|
* public:
|
||||||
|
* typedef ... execution_space ;
|
||||||
|
* void operator() ( WorkType iwork ) const ;
|
||||||
|
* };
|
||||||
|
* \endcode
|
||||||
|
*
|
||||||
|
* In the above example, \c WorkType is any integer type for which a
|
||||||
|
* valid conversion from \c size_t to \c IntType exists. Its
|
||||||
|
* <tt>operator()</tt> method defines the operation to parallelize,
|
||||||
|
* over the range of integer indices <tt>iwork=[0,work_count-1]</tt>.
|
||||||
|
* This compares to a single iteration \c iwork of a \c for loop.
|
||||||
|
* If \c execution_space is not defined DefaultExecutionSpace will be used.
|
||||||
|
*/
|
||||||
|
template< class ExecPolicy , class FunctorType >
|
||||||
|
inline
|
||||||
|
void parallel_for( const ExecPolicy & policy
|
||||||
|
, const FunctorType & functor
|
||||||
|
, const std::string& str = ""
|
||||||
|
, typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0
|
||||||
|
)
|
||||||
|
{
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
uint64_t kpID = 0;
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
(void) Impl::ParallelFor< FunctorType , ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy );
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::endParallelFor(kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
inline
|
||||||
|
void parallel_for( const size_t work_count
|
||||||
|
, const FunctorType & functor
|
||||||
|
, const std::string& str = ""
|
||||||
|
)
|
||||||
|
{
|
||||||
|
typedef typename
|
||||||
|
Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
|
||||||
|
execution_space ;
|
||||||
|
typedef RangePolicy< execution_space > policy ;
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
uint64_t kpID = 0;
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
(void) Impl::ParallelFor< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) );
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::endParallelFor(kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class ExecPolicy , class FunctorType >
|
||||||
|
inline
|
||||||
|
void parallel_for( const std::string & str
|
||||||
|
, const ExecPolicy & policy
|
||||||
|
, const FunctorType & functor )
|
||||||
|
{
|
||||||
|
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
|
||||||
|
Kokkos::fence();
|
||||||
|
std::cout << "KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
parallel_for(policy,functor,str);
|
||||||
|
|
||||||
|
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
|
||||||
|
Kokkos::fence();
|
||||||
|
std::cout << "KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl;
|
||||||
|
#endif
|
||||||
|
(void) str;
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/** \brief Parallel reduction
|
||||||
|
*
|
||||||
|
* Example of a parallel_reduce functor for a POD (plain old data) value type:
|
||||||
|
* \code
|
||||||
|
* class FunctorType { // For POD value type
|
||||||
|
* public:
|
||||||
|
* typedef ... execution_space ;
|
||||||
|
* typedef <podType> value_type ;
|
||||||
|
* void operator()( <intType> iwork , <podType> & update ) const ;
|
||||||
|
* void init( <podType> & update ) const ;
|
||||||
|
* void join( volatile <podType> & update ,
|
||||||
|
* volatile const <podType> & input ) const ;
|
||||||
|
*
|
||||||
|
* typedef true_type has_final ;
|
||||||
|
* void final( <podType> & update ) const ;
|
||||||
|
* };
|
||||||
|
* \endcode
|
||||||
|
*
|
||||||
|
* Example of a parallel_reduce functor for an array of POD (plain old data) values:
|
||||||
|
* \code
|
||||||
|
* class FunctorType { // For array of POD value
|
||||||
|
* public:
|
||||||
|
* typedef ... execution_space ;
|
||||||
|
* typedef <podType> value_type[] ;
|
||||||
|
* void operator()( <intType> , <podType> update[] ) const ;
|
||||||
|
* void init( <podType> update[] ) const ;
|
||||||
|
* void join( volatile <podType> update[] ,
|
||||||
|
* volatile const <podType> input[] ) const ;
|
||||||
|
*
|
||||||
|
* typedef true_type has_final ;
|
||||||
|
* void final( <podType> update[] ) const ;
|
||||||
|
* };
|
||||||
|
* \endcode
|
||||||
|
*/
|
||||||
|
template< class ExecPolicy , class FunctorType >
|
||||||
|
inline
|
||||||
|
void parallel_reduce( const ExecPolicy & policy
|
||||||
|
, const FunctorType & functor
|
||||||
|
, const std::string& str = ""
|
||||||
|
, typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0
|
||||||
|
)
|
||||||
|
{
|
||||||
|
// typedef typename
|
||||||
|
// Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space
|
||||||
|
// execution_space ;
|
||||||
|
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ;
|
||||||
|
|
||||||
|
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
|
||||||
|
, typename ValueTraits::value_type
|
||||||
|
, typename ValueTraits::pointer_type
|
||||||
|
>::type value_type ;
|
||||||
|
|
||||||
|
Kokkos::View< value_type
|
||||||
|
, HostSpace
|
||||||
|
, Kokkos::MemoryUnmanaged
|
||||||
|
>
|
||||||
|
result_view ;
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
uint64_t kpID = 0;
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
(void) Impl::ParallelReduce< FunctorType , ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy , result_view );
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::endParallelReduce(kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// integral range policy
|
||||||
|
template< class FunctorType >
|
||||||
|
inline
|
||||||
|
void parallel_reduce( const size_t work_count
|
||||||
|
, const FunctorType & functor
|
||||||
|
, const std::string& str = ""
|
||||||
|
)
|
||||||
|
{
|
||||||
|
typedef typename
|
||||||
|
Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
|
||||||
|
execution_space ;
|
||||||
|
|
||||||
|
typedef RangePolicy< execution_space > policy ;
|
||||||
|
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
|
||||||
|
|
||||||
|
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
|
||||||
|
, typename ValueTraits::value_type
|
||||||
|
, typename ValueTraits::pointer_type
|
||||||
|
>::type value_type ;
|
||||||
|
|
||||||
|
Kokkos::View< value_type
|
||||||
|
, HostSpace
|
||||||
|
, Kokkos::MemoryUnmanaged
|
||||||
|
>
|
||||||
|
result_view ;
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
uint64_t kpID = 0;
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
(void) Impl::ParallelReduce< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) , result_view );
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::endParallelReduce(kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// general policy and view ouput
|
||||||
|
template< class ExecPolicy , class FunctorType , class ViewType >
|
||||||
|
inline
|
||||||
|
void parallel_reduce( const ExecPolicy & policy
|
||||||
|
, const FunctorType & functor
|
||||||
|
, const ViewType & result_view
|
||||||
|
, const std::string& str = ""
|
||||||
|
, typename Impl::enable_if<
|
||||||
|
( Impl::is_view<ViewType>::value && ! Impl::is_integral< ExecPolicy >::value
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
&& ! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value
|
||||||
|
#endif
|
||||||
|
)>::type * = 0 )
|
||||||
|
{
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
uint64_t kpID = 0;
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
(void) Impl::ParallelReduce< FunctorType, ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy , Impl::CopyWithoutTracking::apply(result_view) );
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::endParallelReduce(kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// general policy and pod or array of pod output
|
||||||
|
template< class ExecPolicy , class FunctorType >
|
||||||
|
void parallel_reduce( const ExecPolicy & policy
|
||||||
|
, const FunctorType & functor
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
, typename Impl::enable_if<
|
||||||
|
( ! Impl::is_integral< ExecPolicy >::value &&
|
||||||
|
! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value )
|
||||||
|
, typename Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag >::reference_type>::type result_ref
|
||||||
|
, const std::string& str = ""
|
||||||
|
, typename Impl::enable_if<! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value >::type* = 0
|
||||||
|
)
|
||||||
|
#else
|
||||||
|
, typename Impl::enable_if<
|
||||||
|
( ! Impl::is_integral< ExecPolicy >::value)
|
||||||
|
, typename Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag >::reference_type
|
||||||
|
>::type result_ref
|
||||||
|
, const std::string& str = ""
|
||||||
|
)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueOps< FunctorType , typename ExecPolicy::work_tag > ValueOps ;
|
||||||
|
|
||||||
|
// Wrap the result output request in a view to inform the implementation
|
||||||
|
// of the type and memory space.
|
||||||
|
|
||||||
|
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
|
||||||
|
, typename ValueTraits::value_type
|
||||||
|
, typename ValueTraits::pointer_type
|
||||||
|
>::type value_type ;
|
||||||
|
|
||||||
|
Kokkos::View< value_type
|
||||||
|
, HostSpace
|
||||||
|
, Kokkos::MemoryUnmanaged
|
||||||
|
>
|
||||||
|
result_view( ValueOps::pointer( result_ref )
|
||||||
|
, ValueTraits::value_count( functor )
|
||||||
|
);
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
uint64_t kpID = 0;
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
(void) Impl::ParallelReduce< FunctorType, ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy , Impl::CopyWithoutTracking::apply(result_view) );
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::endParallelReduce(kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// integral range policy and view ouput
|
||||||
|
template< class FunctorType , class ViewType >
|
||||||
|
inline
|
||||||
|
void parallel_reduce( const size_t work_count
|
||||||
|
, const FunctorType & functor
|
||||||
|
, const ViewType & result_view
|
||||||
|
, const std::string& str = ""
|
||||||
|
, typename Impl::enable_if<( Impl::is_view<ViewType>::value
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
&& ! Impl::is_same<
|
||||||
|
typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space,
|
||||||
|
Kokkos::Cuda>::value
|
||||||
|
#endif
|
||||||
|
)>::type * = 0 )
|
||||||
|
{
|
||||||
|
typedef typename
|
||||||
|
Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
|
||||||
|
execution_space ;
|
||||||
|
|
||||||
|
typedef RangePolicy< execution_space > ExecPolicy ;
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
uint64_t kpID = 0;
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
(void) Impl::ParallelReduce< FunctorType, ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , ExecPolicy(0,work_count) , Impl::CopyWithoutTracking::apply(result_view) );
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::endParallelReduce(kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// integral range policy and pod or array of pod output
|
||||||
|
template< class FunctorType >
|
||||||
|
inline
|
||||||
|
void parallel_reduce( const size_t work_count
|
||||||
|
, const FunctorType & functor
|
||||||
|
, typename Kokkos::Impl::FunctorValueTraits<
|
||||||
|
typename Impl::if_c<Impl::is_execution_policy<FunctorType>::value ||
|
||||||
|
Impl::is_integral<FunctorType>::value,
|
||||||
|
void,FunctorType>::type
|
||||||
|
, void >::reference_type result
|
||||||
|
, const std::string& str = ""
|
||||||
|
, typename Impl::enable_if< true
|
||||||
|
#ifdef KOKKOS_HAVE_CUDA
|
||||||
|
&& ! Impl::is_same<
|
||||||
|
typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space,
|
||||||
|
Kokkos::Cuda>::value
|
||||||
|
#endif
|
||||||
|
>::type * = 0 )
|
||||||
|
{
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueOps< FunctorType , void > ValueOps ;
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
|
||||||
|
execution_space ;
|
||||||
|
|
||||||
|
typedef Kokkos::RangePolicy< execution_space > policy ;
|
||||||
|
|
||||||
|
// Wrap the result output request in a view to inform the implementation
|
||||||
|
// of the type and memory space.
|
||||||
|
|
||||||
|
typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
|
||||||
|
, typename ValueTraits::value_type
|
||||||
|
, typename ValueTraits::pointer_type
|
||||||
|
>::type value_type ;
|
||||||
|
|
||||||
|
Kokkos::View< value_type
|
||||||
|
, HostSpace
|
||||||
|
, Kokkos::MemoryUnmanaged
|
||||||
|
>
|
||||||
|
result_view( ValueOps::pointer( result )
|
||||||
|
, ValueTraits::value_count( functor )
|
||||||
|
);
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
uint64_t kpID = 0;
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
(void) Impl::ParallelReduce< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) , Impl::CopyWithoutTracking::apply(result_view) );
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::endParallelReduce(kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class ExecPolicy , class FunctorType , class ResultType >
|
||||||
|
inline
|
||||||
|
void parallel_reduce( const std::string & str
|
||||||
|
, const ExecPolicy & policy
|
||||||
|
, const FunctorType & functor
|
||||||
|
, ResultType * result)
|
||||||
|
{
|
||||||
|
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
|
||||||
|
Kokkos::fence();
|
||||||
|
std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
parallel_reduce(policy,functor,result,str);
|
||||||
|
|
||||||
|
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
|
||||||
|
Kokkos::fence();
|
||||||
|
std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
|
||||||
|
#endif
|
||||||
|
(void) str;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class ExecPolicy , class FunctorType , class ResultType >
|
||||||
|
inline
|
||||||
|
void parallel_reduce( const std::string & str
|
||||||
|
, const ExecPolicy & policy
|
||||||
|
, const FunctorType & functor
|
||||||
|
, ResultType & result)
|
||||||
|
{
|
||||||
|
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
|
||||||
|
Kokkos::fence();
|
||||||
|
std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
parallel_reduce(policy,functor,result,str);
|
||||||
|
|
||||||
|
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
|
||||||
|
Kokkos::fence();
|
||||||
|
std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
|
||||||
|
#endif
|
||||||
|
(void) str;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class ExecPolicy , class FunctorType >
|
||||||
|
inline
|
||||||
|
void parallel_reduce( const std::string & str
|
||||||
|
, const ExecPolicy & policy
|
||||||
|
, const FunctorType & functor)
|
||||||
|
{
|
||||||
|
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
|
||||||
|
Kokkos::fence();
|
||||||
|
std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
parallel_reduce(policy,functor,str);
|
||||||
|
|
||||||
|
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
|
||||||
|
Kokkos::fence();
|
||||||
|
std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
|
||||||
|
#endif
|
||||||
|
(void) str;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/// \fn parallel_scan
|
||||||
|
/// \tparam ExecutionPolicy The execution policy type.
|
||||||
|
/// \tparam FunctorType The scan functor type.
|
||||||
|
///
|
||||||
|
/// \param policy [in] The execution policy.
|
||||||
|
/// \param functor [in] The scan functor.
|
||||||
|
///
|
||||||
|
/// This function implements a parallel scan pattern. The scan can
|
||||||
|
/// be either inclusive or exclusive, depending on how you implement
|
||||||
|
/// the scan functor.
|
||||||
|
///
|
||||||
|
/// A scan functor looks almost exactly like a reduce functor, except
|
||||||
|
/// that its operator() takes a third \c bool argument, \c final_pass,
|
||||||
|
/// which indicates whether this is the last pass of the scan
|
||||||
|
/// operation. We will show below how to use the \c final_pass
|
||||||
|
/// argument to control whether the scan is inclusive or exclusive.
|
||||||
|
///
|
||||||
|
/// Here is the minimum required interface of a scan functor for a POD
|
||||||
|
/// (plain old data) value type \c PodType. That is, the result is a
|
||||||
|
/// View of zero or more PodType. It is also possible for the result
|
||||||
|
/// to be an array of (same-sized) arrays of PodType, but we do not
|
||||||
|
/// show the required interface for that here.
|
||||||
|
/// \code
|
||||||
|
/// template< class ExecPolicy , class FunctorType >
|
||||||
|
/// class ScanFunctor {
|
||||||
|
/// public:
|
||||||
|
/// // The Kokkos device type
|
||||||
|
/// typedef ... execution_space;
|
||||||
|
/// // Type of an entry of the array containing the result;
|
||||||
|
/// // also the type of each of the entries combined using
|
||||||
|
/// // operator() or join().
|
||||||
|
/// typedef PodType value_type;
|
||||||
|
///
|
||||||
|
/// void operator () (const ExecPolicy::member_type & i, value_type& update, const bool final_pass) const;
|
||||||
|
/// void init (value_type& update) const;
|
||||||
|
/// void join (volatile value_type& update, volatile const value_type& input) const
|
||||||
|
/// };
|
||||||
|
/// \endcode
|
||||||
|
///
|
||||||
|
/// Here is an example of a functor which computes an inclusive plus-scan
|
||||||
|
/// of an array of \c int, in place. If given an array [1, 2, 3, 4], this
|
||||||
|
/// scan will overwrite that array with [1, 3, 6, 10].
|
||||||
|
///
|
||||||
|
/// \code
|
||||||
|
/// template<class SpaceType>
|
||||||
|
/// class InclScanFunctor {
|
||||||
|
/// public:
|
||||||
|
/// typedef SpaceType execution_space;
|
||||||
|
/// typedef int value_type;
|
||||||
|
/// typedef typename SpaceType::size_type size_type;
|
||||||
|
///
|
||||||
|
/// InclScanFunctor( Kokkos::View<value_type*, execution_space> x
|
||||||
|
/// , Kokkos::View<value_type*, execution_space> y ) : m_x(x), m_y(y) {}
|
||||||
|
///
|
||||||
|
/// void operator () (const size_type i, value_type& update, const bool final_pass) const {
|
||||||
|
/// update += m_x(i);
|
||||||
|
/// if (final_pass) {
|
||||||
|
/// m_y(i) = update;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// void init (value_type& update) const {
|
||||||
|
/// update = 0;
|
||||||
|
/// }
|
||||||
|
/// void join (volatile value_type& update, volatile const value_type& input) const {
|
||||||
|
/// update += input;
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// private:
|
||||||
|
/// Kokkos::View<value_type*, execution_space> m_x;
|
||||||
|
/// Kokkos::View<value_type*, execution_space> m_y;
|
||||||
|
/// };
|
||||||
|
/// \endcode
|
||||||
|
///
|
||||||
|
/// Here is an example of a functor which computes an <i>exclusive</i>
|
||||||
|
/// scan of an array of \c int, in place. In operator(), note both
|
||||||
|
/// that the final_pass test and the update have switched places, and
|
||||||
|
/// the use of a temporary. If given an array [1, 2, 3, 4], this scan
|
||||||
|
/// will overwrite that array with [0, 1, 3, 6].
|
||||||
|
///
|
||||||
|
/// \code
|
||||||
|
/// template<class SpaceType>
|
||||||
|
/// class ExclScanFunctor {
|
||||||
|
/// public:
|
||||||
|
/// typedef SpaceType execution_space;
|
||||||
|
/// typedef int value_type;
|
||||||
|
/// typedef typename SpaceType::size_type size_type;
|
||||||
|
///
|
||||||
|
/// ExclScanFunctor (Kokkos::View<value_type*, execution_space> x) : x_ (x) {}
|
||||||
|
///
|
||||||
|
/// void operator () (const size_type i, value_type& update, const bool final_pass) const {
|
||||||
|
/// const value_type x_i = x_(i);
|
||||||
|
/// if (final_pass) {
|
||||||
|
/// x_(i) = update;
|
||||||
|
/// }
|
||||||
|
/// update += x_i;
|
||||||
|
/// }
|
||||||
|
/// void init (value_type& update) const {
|
||||||
|
/// update = 0;
|
||||||
|
/// }
|
||||||
|
/// void join (volatile value_type& update, volatile const value_type& input) const {
|
||||||
|
/// update += input;
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// private:
|
||||||
|
/// Kokkos::View<value_type*, execution_space> x_;
|
||||||
|
/// };
|
||||||
|
/// \endcode
|
||||||
|
///
|
||||||
|
/// Here is an example of a functor which builds on the above
|
||||||
|
/// exclusive scan example, to compute an offsets array from a
|
||||||
|
/// population count array, in place. We assume that the pop count
|
||||||
|
/// array has an extra entry at the end to store the final count. If
|
||||||
|
/// given an array [1, 2, 3, 4, 0], this scan will overwrite that
|
||||||
|
/// array with [0, 1, 3, 6, 10].
|
||||||
|
///
|
||||||
|
/// \code
|
||||||
|
/// template<class SpaceType>
|
||||||
|
/// class OffsetScanFunctor {
|
||||||
|
/// public:
|
||||||
|
/// typedef SpaceType execution_space;
|
||||||
|
/// typedef int value_type;
|
||||||
|
/// typedef typename SpaceType::size_type size_type;
|
||||||
|
///
|
||||||
|
/// // lastIndex_ is the last valid index (zero-based) of x.
|
||||||
|
/// // If x has length zero, then lastIndex_ won't be used anyway.
|
||||||
|
/// OffsetScanFunctor( Kokkos::View<value_type*, execution_space> x
|
||||||
|
/// , Kokkos::View<value_type*, execution_space> y )
|
||||||
|
/// : m_x(x), m_y(y), last_index_ (x.dimension_0 () == 0 ? 0 : x.dimension_0 () - 1)
|
||||||
|
/// {}
|
||||||
|
///
|
||||||
|
/// void operator () (const size_type i, int& update, const bool final_pass) const {
|
||||||
|
/// if (final_pass) {
|
||||||
|
/// m_y(i) = update;
|
||||||
|
/// }
|
||||||
|
/// update += m_x(i);
|
||||||
|
/// // The last entry of m_y gets the final sum.
|
||||||
|
/// if (final_pass && i == last_index_) {
|
||||||
|
/// m_y(i+1) = update;
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// void init (value_type& update) const {
|
||||||
|
/// update = 0;
|
||||||
|
/// }
|
||||||
|
/// void join (volatile value_type& update, volatile const value_type& input) const {
|
||||||
|
/// update += input;
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// private:
|
||||||
|
/// Kokkos::View<value_type*, execution_space> m_x;
|
||||||
|
/// Kokkos::View<value_type*, execution_space> m_y;
|
||||||
|
/// const size_type last_index_;
|
||||||
|
/// };
|
||||||
|
/// \endcode
|
||||||
|
///
|
||||||
|
template< class ExecutionPolicy , class FunctorType >
|
||||||
|
inline
|
||||||
|
void parallel_scan( const ExecutionPolicy & policy
|
||||||
|
, const FunctorType & functor
|
||||||
|
, const std::string& str = ""
|
||||||
|
, typename Impl::enable_if< ! Impl::is_integral< ExecutionPolicy >::value >::type * = 0
|
||||||
|
)
|
||||||
|
{
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
uint64_t kpID = 0;
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Impl::ParallelScan< FunctorType , ExecutionPolicy > scan( Impl::CopyWithoutTracking::apply(functor) , policy );
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::endParallelScan(kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
inline
|
||||||
|
void parallel_scan( const size_t work_count
|
||||||
|
, const FunctorType & functor
|
||||||
|
, const std::string& str = "" )
|
||||||
|
{
|
||||||
|
typedef typename
|
||||||
|
Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
|
||||||
|
execution_space ;
|
||||||
|
|
||||||
|
typedef Kokkos::RangePolicy< execution_space > policy ;
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
uint64_t kpID = 0;
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
(void) Impl::ParallelScan< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) );
|
||||||
|
|
||||||
|
#ifdef KOKKOSP_ENABLE_PROFILING
|
||||||
|
if(Kokkos::Experimental::profileLibraryLoaded()) {
|
||||||
|
Kokkos::Experimental::endParallelScan(kpID);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class ExecutionPolicy , class FunctorType >
|
||||||
|
inline
|
||||||
|
void parallel_scan( const std::string& str
|
||||||
|
, const ExecutionPolicy & policy
|
||||||
|
, const FunctorType & functor)
|
||||||
|
{
|
||||||
|
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
|
||||||
|
Kokkos::fence();
|
||||||
|
std::cout << "KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
parallel_scan(policy,functor,str);
|
||||||
|
|
||||||
|
#if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
|
||||||
|
Kokkos::fence();
|
||||||
|
std::cout << "KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
|
||||||
|
#endif
|
||||||
|
(void) str;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template< class FunctorType , class Enable = void >
|
||||||
|
struct FunctorTeamShmemSize
|
||||||
|
{
|
||||||
|
static inline size_t value( const FunctorType & , int ) { return 0 ; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::team_shmem_size ) >::type >
|
||||||
|
{
|
||||||
|
static inline size_t value( const FunctorType & f , int team_size ) { return f.team_shmem_size( team_size ) ; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::shmem_size ) >::type >
|
||||||
|
{
|
||||||
|
static inline size_t value( const FunctorType & f , int team_size ) { return f.shmem_size( team_size ) ; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* KOKKOS_PARALLEL_HPP */
|
||||||
|
|
||||||
165
lib/kokkos/core/src/Kokkos_Qthread.hpp
Executable file
165
lib/kokkos/core/src/Kokkos_Qthread.hpp
Executable file
@ -0,0 +1,165 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_QTHREAD_HPP
|
||||||
|
#define KOKKOS_QTHREAD_HPP
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <iosfwd>
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
#include <Kokkos_Layout.hpp>
|
||||||
|
#include <Kokkos_MemoryTraits.hpp>
|
||||||
|
#include <Kokkos_HostSpace.hpp>
|
||||||
|
#include <Kokkos_ExecPolicy.hpp>
|
||||||
|
#include <impl/Kokkos_Tags.hpp>
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
class QthreadExec ;
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Execution space supported by Qthread */
|
||||||
|
class Qthread {
|
||||||
|
public:
|
||||||
|
//! \name Type declarations that all Kokkos devices must provide.
|
||||||
|
//@{
|
||||||
|
|
||||||
|
//! Tag this class as an execution space
|
||||||
|
typedef Qthread execution_space ;
|
||||||
|
typedef Kokkos::HostSpace memory_space ;
|
||||||
|
//! This execution space preferred device_type
|
||||||
|
typedef Kokkos::Device<execution_space,memory_space> device_type;
|
||||||
|
|
||||||
|
typedef Kokkos::LayoutRight array_layout ;
|
||||||
|
typedef memory_space::size_type size_type ;
|
||||||
|
|
||||||
|
typedef ScratchMemorySpace< Qthread > scratch_memory_space ;
|
||||||
|
|
||||||
|
//@}
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
/** \brief Initialization will construct one or more instances */
|
||||||
|
static Qthread & instance( int = 0 );
|
||||||
|
|
||||||
|
/** \brief Set the execution space to a "sleep" state.
|
||||||
|
*
|
||||||
|
* This function sets the "sleep" state in which it is not ready for work.
|
||||||
|
* This may consume less resources than in an "ready" state,
|
||||||
|
* but it may also take time to transition to the "ready" state.
|
||||||
|
*
|
||||||
|
* \return True if enters or is in the "sleep" state.
|
||||||
|
* False if functions are currently executing.
|
||||||
|
*/
|
||||||
|
bool sleep();
|
||||||
|
|
||||||
|
/** \brief Wake from the sleep state.
|
||||||
|
*
|
||||||
|
* \return True if enters or is in the "ready" state.
|
||||||
|
* False if functions are currently executing.
|
||||||
|
*/
|
||||||
|
static bool wake();
|
||||||
|
|
||||||
|
/** \brief Wait until all dispatched functions to complete.
|
||||||
|
*
|
||||||
|
* The parallel_for or parallel_reduce dispatch of a functor may
|
||||||
|
* return asynchronously, before the functor completes. This
|
||||||
|
* method does not return until all dispatched functors on this
|
||||||
|
* device have completed.
|
||||||
|
*/
|
||||||
|
static void fence();
|
||||||
|
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
static void initialize( int thread_count );
|
||||||
|
static void finalize();
|
||||||
|
|
||||||
|
/** \brief Print configuration information to the given output stream. */
|
||||||
|
static void print_configuration( std::ostream & , const bool detail = false );
|
||||||
|
|
||||||
|
int shepherd_size() const ;
|
||||||
|
int shepherd_worker_size() const ;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace
|
||||||
|
< Kokkos::Qthread::memory_space
|
||||||
|
, Kokkos::Qthread::scratch_memory_space
|
||||||
|
>
|
||||||
|
{
|
||||||
|
enum { value = true };
|
||||||
|
inline static void verify( void ) { }
|
||||||
|
inline static void verify( const void * ) { }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
#include <Kokkos_Parallel.hpp>
|
||||||
|
#include <Qthread/Kokkos_QthreadExec.hpp>
|
||||||
|
#include <Qthread/Kokkos_Qthread_Parallel.hpp>
|
||||||
|
|
||||||
|
#endif /* #define KOKKOS_QTHREAD_HPP */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
125
lib/kokkos/core/src/Kokkos_ScratchSpace.hpp
Executable file
125
lib/kokkos/core/src/Kokkos_ScratchSpace.hpp
Executable file
@ -0,0 +1,125 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_SCRATCHSPACE_HPP
|
||||||
|
#define KOKKOS_SCRATCHSPACE_HPP
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <Kokkos_Core_fwd.hpp>
|
||||||
|
#include <impl/Kokkos_Tags.hpp>
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Scratch memory space associated with an execution space.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template< class ExecSpace >
|
||||||
|
class ScratchMemorySpace {
|
||||||
|
public:
|
||||||
|
|
||||||
|
// Alignment of memory chunks returned by 'get'
|
||||||
|
// must be a power of two
|
||||||
|
enum { ALIGN = 8 };
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
mutable char * m_iter ;
|
||||||
|
char * m_end ;
|
||||||
|
|
||||||
|
ScratchMemorySpace();
|
||||||
|
ScratchMemorySpace & operator = ( const ScratchMemorySpace & );
|
||||||
|
|
||||||
|
enum { MASK = ALIGN - 1 }; // Alignment used by View::shmem_size
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
//! Tag this class as a memory space
|
||||||
|
typedef ScratchMemorySpace memory_space ;
|
||||||
|
typedef ExecSpace execution_space ;
|
||||||
|
//! This execution space preferred device_type
|
||||||
|
typedef Kokkos::Device<execution_space,memory_space> device_type;
|
||||||
|
|
||||||
|
typedef typename ExecSpace::array_layout array_layout ;
|
||||||
|
typedef typename ExecSpace::size_type size_type ;
|
||||||
|
|
||||||
|
template< typename IntType >
|
||||||
|
KOKKOS_INLINE_FUNCTION static
|
||||||
|
IntType align( const IntType & size )
|
||||||
|
{ return ( size + MASK ) & ~MASK ; }
|
||||||
|
|
||||||
|
template< typename IntType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void* get_shmem (const IntType& size) const {
|
||||||
|
void* tmp = m_iter ;
|
||||||
|
if (m_end < (m_iter += align (size))) {
|
||||||
|
m_iter -= align (size); // put it back like it was
|
||||||
|
#ifdef KOKKOS_HAVE_DEBUG
|
||||||
|
// mfh 23 Jun 2015: printf call consumes 25 registers
|
||||||
|
// in a CUDA build, so only print in debug mode. The
|
||||||
|
// function still returns NULL if not enough memory.
|
||||||
|
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
|
||||||
|
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
|
||||||
|
long(m_end-m_iter));
|
||||||
|
#endif // KOKKOS_HAVE_DEBUG
|
||||||
|
tmp = 0;
|
||||||
|
}
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename IntType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
ScratchMemorySpace( void * ptr , const IntType & size )
|
||||||
|
: m_iter( (char *) ptr )
|
||||||
|
, m_end( m_iter + size )
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#endif /* #ifndef KOKKOS_SCRATCHSPACE_HPP */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
892
lib/kokkos/core/src/Kokkos_Serial.hpp
Executable file
892
lib/kokkos/core/src/Kokkos_Serial.hpp
Executable file
@ -0,0 +1,892 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
/// \file Kokkos_Serial.hpp
|
||||||
|
/// \brief Declaration and definition of Kokkos::Serial device.
|
||||||
|
|
||||||
|
#ifndef KOKKOS_SERIAL_HPP
|
||||||
|
#define KOKKOS_SERIAL_HPP
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <iosfwd>
|
||||||
|
#include <Kokkos_Parallel.hpp>
|
||||||
|
#include <Kokkos_Layout.hpp>
|
||||||
|
#include <Kokkos_HostSpace.hpp>
|
||||||
|
#include <Kokkos_ScratchSpace.hpp>
|
||||||
|
#include <Kokkos_MemoryTraits.hpp>
|
||||||
|
#include <impl/Kokkos_Tags.hpp>
|
||||||
|
#include <impl/Kokkos_FunctorAdapter.hpp>
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_SERIAL )
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/// \class Serial
|
||||||
|
/// \brief Kokkos device for non-parallel execution
|
||||||
|
///
|
||||||
|
/// A "device" represents a parallel execution model. It tells Kokkos
|
||||||
|
/// how to parallelize the execution of kernels in a parallel_for or
|
||||||
|
/// parallel_reduce. For example, the Threads device uses Pthreads or
|
||||||
|
/// C++11 threads on a CPU, the OpenMP device uses the OpenMP language
|
||||||
|
/// extensions, and the Cuda device uses NVIDIA's CUDA programming
|
||||||
|
/// model. The Serial device executes "parallel" kernels
|
||||||
|
/// sequentially. This is useful if you really do not want to use
|
||||||
|
/// threads, or if you want to explore different combinations of MPI
|
||||||
|
/// and shared-memory parallel programming models.
|
||||||
|
class Serial {
|
||||||
|
public:
|
||||||
|
//! \name Type declarations that all Kokkos devices must provide.
|
||||||
|
//@{
|
||||||
|
|
||||||
|
//! Tag this class as an execution space:
|
||||||
|
typedef Serial execution_space ;
|
||||||
|
//! The size_type typedef best suited for this device.
|
||||||
|
typedef HostSpace::size_type size_type ;
|
||||||
|
//! This device's preferred memory space.
|
||||||
|
typedef HostSpace memory_space ;
|
||||||
|
//! This execution space preferred device_type
|
||||||
|
typedef Kokkos::Device<execution_space,memory_space> device_type;
|
||||||
|
|
||||||
|
//! This device's preferred array layout.
|
||||||
|
typedef LayoutRight array_layout ;
|
||||||
|
|
||||||
|
/// \brief Scratch memory space
|
||||||
|
typedef ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ;
|
||||||
|
|
||||||
|
//@}
|
||||||
|
|
||||||
|
/// \brief True if and only if this method is being called in a
|
||||||
|
/// thread-parallel function.
|
||||||
|
///
|
||||||
|
/// For the Serial device, this method <i>always</i> returns false,
|
||||||
|
/// because parallel_for or parallel_reduce with the Serial device
|
||||||
|
/// always execute sequentially.
|
||||||
|
inline static int in_parallel() { return false ; }
|
||||||
|
|
||||||
|
/** \brief Set the device in a "sleep" state.
|
||||||
|
*
|
||||||
|
* This function sets the device in a "sleep" state in which it is
|
||||||
|
* not ready for work. This may consume less resources than if the
|
||||||
|
* device were in an "awake" state, but it may also take time to
|
||||||
|
* bring the device from a sleep state to be ready for work.
|
||||||
|
*
|
||||||
|
* \return True if the device is in the "sleep" state, else false if
|
||||||
|
* the device is actively working and could not enter the "sleep"
|
||||||
|
* state.
|
||||||
|
*/
|
||||||
|
static bool sleep();
|
||||||
|
|
||||||
|
/// \brief Wake the device from the 'sleep' state so it is ready for work.
|
||||||
|
///
|
||||||
|
/// \return True if the device is in the "ready" state, else "false"
|
||||||
|
/// if the device is actively working (which also means that it's
|
||||||
|
/// awake).
|
||||||
|
static bool wake();
|
||||||
|
|
||||||
|
/// \brief Wait until all dispatched functors complete.
|
||||||
|
///
|
||||||
|
/// The parallel_for or parallel_reduce dispatch of a functor may
|
||||||
|
/// return asynchronously, before the functor completes. This
|
||||||
|
/// method does not return until all dispatched functors on this
|
||||||
|
/// device have completed.
|
||||||
|
static void fence() {}
|
||||||
|
|
||||||
|
static void initialize( unsigned threads_count = 1 ,
|
||||||
|
unsigned use_numa_count = 0 ,
|
||||||
|
unsigned use_cores_per_numa = 0 ,
|
||||||
|
bool allow_asynchronous_threadpool = false) {
|
||||||
|
(void) threads_count;
|
||||||
|
(void) use_numa_count;
|
||||||
|
(void) use_cores_per_numa;
|
||||||
|
(void) allow_asynchronous_threadpool;
|
||||||
|
|
||||||
|
// Init the array of locks used for arbitrarily sized atomics
|
||||||
|
Impl::init_lock_array_host_space();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static int is_initialized() { return 1 ; }
|
||||||
|
|
||||||
|
//! Free any resources being consumed by the device.
|
||||||
|
static void finalize() {}
|
||||||
|
|
||||||
|
//! Print configuration information to the given output stream.
|
||||||
|
static void print_configuration( std::ostream & , const bool detail = false ) {}
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------
|
||||||
|
|
||||||
|
inline static int thread_pool_size( int = 0 ) { return 1 ; }
|
||||||
|
KOKKOS_INLINE_FUNCTION static int thread_pool_rank() { return 0 ; }
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION static unsigned hardware_thread_id() { return thread_pool_rank(); }
|
||||||
|
inline static unsigned max_hardware_threads() { return thread_pool_size(0); }
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------
|
||||||
|
|
||||||
|
static void * scratch_memory_resize( unsigned reduce_size , unsigned shared_size );
|
||||||
|
|
||||||
|
//--------------------------------------------------------------------------
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace
|
||||||
|
< Kokkos::Serial::memory_space
|
||||||
|
, Kokkos::Serial::scratch_memory_space
|
||||||
|
>
|
||||||
|
{
|
||||||
|
enum { value = true };
|
||||||
|
inline static void verify( void ) { }
|
||||||
|
inline static void verify( const void * ) { }
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace SerialImpl {
|
||||||
|
|
||||||
|
struct Sentinel {
|
||||||
|
|
||||||
|
void * m_scratch ;
|
||||||
|
unsigned m_reduce_end ;
|
||||||
|
unsigned m_shared_end ;
|
||||||
|
|
||||||
|
Sentinel();
|
||||||
|
~Sentinel();
|
||||||
|
static Sentinel & singleton();
|
||||||
|
};
|
||||||
|
|
||||||
|
inline
|
||||||
|
unsigned align( unsigned n );
|
||||||
|
}
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
class SerialTeamMember {
|
||||||
|
private:
|
||||||
|
typedef Kokkos::ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ;
|
||||||
|
const scratch_memory_space m_space ;
|
||||||
|
const int m_league_rank ;
|
||||||
|
const int m_league_size ;
|
||||||
|
|
||||||
|
SerialTeamMember & operator = ( const SerialTeamMember & );
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
const scratch_memory_space & team_shmem() const { return m_space ; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; }
|
||||||
|
KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
|
||||||
|
KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; }
|
||||||
|
KOKKOS_INLINE_FUNCTION int team_size() const { return 1 ; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION void team_barrier() const {}
|
||||||
|
|
||||||
|
template<class ValueType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void team_broadcast(const ValueType& , const int& ) const {}
|
||||||
|
|
||||||
|
template< class ValueType, class JoinOp >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
ValueType team_reduce( const ValueType & value , const JoinOp & ) const
|
||||||
|
{
|
||||||
|
return value ;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
|
||||||
|
* with intra-team non-deterministic ordering accumulation.
|
||||||
|
*
|
||||||
|
* The global inter-team accumulation value will, at the end of the
|
||||||
|
* league's parallel execution, be the scan's total.
|
||||||
|
* Parallel execution ordering of the league's teams is non-deterministic.
|
||||||
|
* As such the base value for each team's scan operation is similarly
|
||||||
|
* non-deterministic.
|
||||||
|
*/
|
||||||
|
template< typename Type >
|
||||||
|
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const
|
||||||
|
{
|
||||||
|
const Type tmp = global_accum ? *global_accum : Type(0) ;
|
||||||
|
if ( global_accum ) { *global_accum += value ; }
|
||||||
|
return tmp ;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
|
||||||
|
*
|
||||||
|
* The highest rank thread can compute the reduction total as
|
||||||
|
* reduction_total = dev.team_scan( value ) + value ;
|
||||||
|
*/
|
||||||
|
template< typename Type >
|
||||||
|
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & ) const
|
||||||
|
{ return Type(0); }
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
// Execution space specific:
|
||||||
|
|
||||||
|
SerialTeamMember( int arg_league_rank
|
||||||
|
, int arg_league_size
|
||||||
|
, int arg_shared_size
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* < Kokkos::Serial , WorkArgTag >
|
||||||
|
* < WorkArgTag , Impl::enable_if< Impl::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value >::type >
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template< class Arg0 , class Arg1 >
|
||||||
|
class TeamPolicy< Arg0 , Arg1 , Kokkos::Serial >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
const int m_league_size ;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
//! Tag this class as a kokkos execution policy
|
||||||
|
typedef TeamPolicy execution_policy ;
|
||||||
|
|
||||||
|
//! Execution space of this execution policy:
|
||||||
|
typedef Kokkos::Serial execution_space ;
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Impl::if_c< ! Impl::is_same< Kokkos::Serial , Arg0 >::value , Arg0 , Arg1 >::type
|
||||||
|
work_tag ;
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
static
|
||||||
|
int team_size_max( const FunctorType & ) { return 1 ; }
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
static
|
||||||
|
int team_size_recommended( const FunctorType & ) { return 1 ; }
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
static
|
||||||
|
int team_size_recommended( const FunctorType & , const int& ) { return 1 ; }
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
inline int team_size() const { return 1 ; }
|
||||||
|
inline int league_size() const { return m_league_size ; }
|
||||||
|
|
||||||
|
/** \brief Specify league size, request team size */
|
||||||
|
TeamPolicy( execution_space & , int league_size_request , int /* team_size_request */ , int vector_length_request = 1 )
|
||||||
|
: m_league_size( league_size_request )
|
||||||
|
{ (void) vector_length_request; }
|
||||||
|
|
||||||
|
TeamPolicy( int league_size_request , int /* team_size_request */ , int vector_length_request = 1 )
|
||||||
|
: m_league_size( league_size_request )
|
||||||
|
{ (void) vector_length_request; }
|
||||||
|
|
||||||
|
typedef Impl::SerialTeamMember member_type ;
|
||||||
|
};
|
||||||
|
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
|
||||||
|
class ParallelFor< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > Policy ;
|
||||||
|
|
||||||
|
public:
|
||||||
|
// work tag is void
|
||||||
|
template< class PType >
|
||||||
|
inline
|
||||||
|
ParallelFor( typename Impl::enable_if<
|
||||||
|
( Impl::is_same< PType , Policy >::value &&
|
||||||
|
Impl::is_same< typename PType::work_tag , void >::value
|
||||||
|
), const FunctorType & >::type functor
|
||||||
|
, const PType & policy )
|
||||||
|
{
|
||||||
|
const typename PType::member_type e = policy.end();
|
||||||
|
for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
|
||||||
|
functor( i );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// work tag is non-void
|
||||||
|
template< class PType >
|
||||||
|
inline
|
||||||
|
ParallelFor( typename Impl::enable_if<
|
||||||
|
( Impl::is_same< PType , Policy >::value &&
|
||||||
|
! Impl::is_same< typename PType::work_tag , void >::value
|
||||||
|
), const FunctorType & >::type functor
|
||||||
|
, const PType & policy )
|
||||||
|
{
|
||||||
|
const typename PType::member_type e = policy.end();
|
||||||
|
for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
|
||||||
|
functor( typename PType::work_tag() , i );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
|
||||||
|
class ParallelReduce< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > >
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > Policy ;
|
||||||
|
typedef typename Policy::work_tag WorkTag ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
|
||||||
|
|
||||||
|
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||||
|
typedef typename ValueTraits::reference_type reference_type ;
|
||||||
|
|
||||||
|
// Work tag is void
|
||||||
|
template< class ViewType , class PType >
|
||||||
|
ParallelReduce( typename Impl::enable_if<
|
||||||
|
( Impl::is_view< ViewType >::value &&
|
||||||
|
Impl::is_same< typename ViewType::memory_space , HostSpace >::value &&
|
||||||
|
Impl::is_same< PType , Policy >::value &&
|
||||||
|
Impl::is_same< typename PType::work_tag , void >::value
|
||||||
|
), const FunctorType & >::type functor
|
||||||
|
, const PType & policy
|
||||||
|
, const ViewType & result
|
||||||
|
)
|
||||||
|
{
|
||||||
|
pointer_type result_ptr = result.ptr_on_device();
|
||||||
|
|
||||||
|
if ( ! result_ptr ) {
|
||||||
|
result_ptr = (pointer_type)
|
||||||
|
Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
reference_type update = ValueInit::init( functor , result_ptr );
|
||||||
|
|
||||||
|
const typename PType::member_type e = policy.end();
|
||||||
|
for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
|
||||||
|
functor( i , update );
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , result_ptr );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Work tag is non-void
|
||||||
|
template< class ViewType , class PType >
|
||||||
|
ParallelReduce( typename Impl::enable_if<
|
||||||
|
( Impl::is_view< ViewType >::value &&
|
||||||
|
Impl::is_same< typename ViewType::memory_space , HostSpace >::value &&
|
||||||
|
Impl::is_same< PType , Policy >::value &&
|
||||||
|
! Impl::is_same< typename PType::work_tag , void >::value
|
||||||
|
), const FunctorType & >::type functor
|
||||||
|
, const PType & policy
|
||||||
|
, const ViewType & result
|
||||||
|
)
|
||||||
|
{
|
||||||
|
pointer_type result_ptr = result.ptr_on_device();
|
||||||
|
|
||||||
|
if ( ! result_ptr ) {
|
||||||
|
result_ptr = (pointer_type)
|
||||||
|
Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
typename ValueTraits::reference_type update = ValueInit::init( functor , result_ptr );
|
||||||
|
|
||||||
|
const typename PType::member_type e = policy.end();
|
||||||
|
for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
|
||||||
|
functor( typename PType::work_tag() , i , update );
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , result_ptr );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
|
||||||
|
class ParallelScan< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > Policy ;
|
||||||
|
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||||
|
typedef typename ValueTraits::reference_type reference_type ;
|
||||||
|
|
||||||
|
// work tag is void
|
||||||
|
template< class PType >
|
||||||
|
inline
|
||||||
|
ParallelScan( typename Impl::enable_if<
|
||||||
|
( Impl::is_same< PType , Policy >::value &&
|
||||||
|
Impl::is_same< typename PType::work_tag , void >::value
|
||||||
|
), const FunctorType & >::type functor
|
||||||
|
, const PType & policy )
|
||||||
|
{
|
||||||
|
pointer_type result_ptr = (pointer_type)
|
||||||
|
Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
|
||||||
|
|
||||||
|
reference_type update = ValueInit::init( functor , result_ptr );
|
||||||
|
|
||||||
|
const typename PType::member_type e = policy.end();
|
||||||
|
for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
|
||||||
|
functor( i , update , true );
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( functor , result_ptr );
|
||||||
|
}
|
||||||
|
|
||||||
|
// work tag is non-void
|
||||||
|
template< class PType >
|
||||||
|
inline
|
||||||
|
ParallelScan( typename Impl::enable_if<
|
||||||
|
( Impl::is_same< PType , Policy >::value &&
|
||||||
|
! Impl::is_same< typename PType::work_tag , void >::value
|
||||||
|
), const FunctorType & >::type functor
|
||||||
|
, const PType & policy )
|
||||||
|
{
|
||||||
|
pointer_type result_ptr = (pointer_type)
|
||||||
|
Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
|
||||||
|
|
||||||
|
reference_type update = ValueInit::init( functor , result_ptr );
|
||||||
|
|
||||||
|
const typename PType::member_type e = policy.end();
|
||||||
|
for ( typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
|
||||||
|
functor( typename PType::work_tag() , i , update , true );
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( functor , result_ptr );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 >
|
||||||
|
class ParallelFor< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > Policy ;
|
||||||
|
|
||||||
|
template< class TagType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, const typename Policy::member_type & member )
|
||||||
|
{ functor( member ); }
|
||||||
|
|
||||||
|
template< class TagType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, const typename Policy::member_type & member )
|
||||||
|
{ functor( TagType() , member ); }
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
ParallelFor( const FunctorType & functor
|
||||||
|
, const Policy & policy )
|
||||||
|
{
|
||||||
|
const int shared_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
|
||||||
|
|
||||||
|
Kokkos::Serial::scratch_memory_resize( 0 , shared_size );
|
||||||
|
|
||||||
|
for ( int ileague = 0 ; ileague < policy.league_size() ; ++ileague ) {
|
||||||
|
ParallelFor::template driver< typename Policy::work_tag >
|
||||||
|
( functor , typename Policy::member_type(ileague,policy.league_size(),shared_size) );
|
||||||
|
// functor( typename Policy::member_type(ileague,policy.league_size(),shared_size) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 >
|
||||||
|
class ParallelReduce< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > Policy ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||||
|
typedef typename ValueTraits::reference_type reference_type ;
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
template< class TagType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, const typename Policy::member_type & member
|
||||||
|
, reference_type update )
|
||||||
|
{ functor( member , update ); }
|
||||||
|
|
||||||
|
template< class TagType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, const typename Policy::member_type & member
|
||||||
|
, reference_type update )
|
||||||
|
{ functor( TagType() , member , update ); }
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
template< class ViewType >
|
||||||
|
ParallelReduce( const FunctorType & functor
|
||||||
|
, const Policy & policy
|
||||||
|
, const ViewType & result
|
||||||
|
)
|
||||||
|
{
|
||||||
|
const int reduce_size = ValueTraits::value_size( functor );
|
||||||
|
const int shared_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
|
||||||
|
void * const scratch_reduce = Kokkos::Serial::scratch_memory_resize( reduce_size , shared_size );
|
||||||
|
|
||||||
|
const pointer_type result_ptr =
|
||||||
|
result.ptr_on_device() ? result.ptr_on_device()
|
||||||
|
: (pointer_type) scratch_reduce ;
|
||||||
|
|
||||||
|
reference_type update = ValueInit::init( functor , result_ptr );
|
||||||
|
|
||||||
|
for ( int ileague = 0 ; ileague < policy.league_size() ; ++ileague ) {
|
||||||
|
ParallelReduce::template driver< typename Policy::work_tag >
|
||||||
|
( functor , typename Policy::member_type(ileague,policy.league_size(),shared_size) , update );
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( functor , result_ptr );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template<typename iType>
|
||||||
|
struct TeamThreadRangeBoundariesStruct<iType,SerialTeamMember> {
|
||||||
|
typedef iType index_type;
|
||||||
|
const iType begin ;
|
||||||
|
const iType end ;
|
||||||
|
enum {increment = 1};
|
||||||
|
const SerialTeamMember& thread;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_count)
|
||||||
|
: begin(0)
|
||||||
|
, end(arg_count)
|
||||||
|
, thread(arg_thread)
|
||||||
|
{}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_begin, const iType & arg_end )
|
||||||
|
: begin( arg_begin )
|
||||||
|
, end( arg_end)
|
||||||
|
, thread( arg_thread )
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename iType>
|
||||||
|
struct ThreadVectorRangeBoundariesStruct<iType,SerialTeamMember> {
|
||||||
|
typedef iType index_type;
|
||||||
|
enum {start = 0};
|
||||||
|
const iType end;
|
||||||
|
enum {increment = 1};
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
ThreadVectorRangeBoundariesStruct (const SerialTeamMember& thread, const iType& count):
|
||||||
|
end( count )
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
|
||||||
|
template<typename iType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>
|
||||||
|
TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & count )
|
||||||
|
{
|
||||||
|
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename iType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>
|
||||||
|
TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & begin , const iType & end )
|
||||||
|
{
|
||||||
|
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,begin,end);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename iType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >
|
||||||
|
ThreadVectorRange(const Impl::SerialTeamMember& thread, const iType& count) {
|
||||||
|
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >(thread,count);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::ThreadSingleStruct<Impl::SerialTeamMember> PerTeam(const Impl::SerialTeamMember& thread) {
|
||||||
|
return Impl::ThreadSingleStruct<Impl::SerialTeamMember>(thread);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::VectorSingleStruct<Impl::SerialTeamMember> PerThread(const Impl::SerialTeamMember& thread) {
|
||||||
|
return Impl::VectorSingleStruct<Impl::SerialTeamMember>(thread);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
|
||||||
|
* This functionality requires C++11 support.*/
|
||||||
|
template<typename iType, class Lambda>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries, const Lambda& lambda) {
|
||||||
|
for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||||
|
lambda(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of
|
||||||
|
* val is performed and put into result. This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class Lambda, typename ValueType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
|
||||||
|
const Lambda & lambda, ValueType& result) {
|
||||||
|
|
||||||
|
result = ValueType();
|
||||||
|
|
||||||
|
for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
ValueType tmp = ValueType();
|
||||||
|
lambda(i,tmp);
|
||||||
|
result+=tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_CXX11
|
||||||
|
|
||||||
|
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
|
||||||
|
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
|
||||||
|
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
|
||||||
|
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
|
||||||
|
* '1 for *'). This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class Lambda, typename ValueType, class JoinType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
|
||||||
|
const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||||
|
|
||||||
|
ValueType result = init_result;
|
||||||
|
|
||||||
|
for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
ValueType tmp = ValueType();
|
||||||
|
lambda(i,tmp);
|
||||||
|
join(result,tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // KOKKOS_HAVE_CXX11
|
||||||
|
|
||||||
|
} //namespace Kokkos
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
|
||||||
|
* This functionality requires C++11 support.*/
|
||||||
|
template<typename iType, class Lambda>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
|
||||||
|
loop_boundaries, const Lambda& lambda) {
|
||||||
|
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||||
|
#pragma ivdep
|
||||||
|
#endif
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||||
|
lambda(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of
|
||||||
|
* val is performed and put into result. This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class Lambda, typename ValueType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
|
||||||
|
loop_boundaries, const Lambda & lambda, ValueType& result) {
|
||||||
|
result = ValueType();
|
||||||
|
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||||
|
#pragma ivdep
|
||||||
|
#endif
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
ValueType tmp = ValueType();
|
||||||
|
lambda(i,tmp);
|
||||||
|
result+=tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
|
||||||
|
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
|
||||||
|
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
|
||||||
|
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
|
||||||
|
* '1 for *'). This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class Lambda, typename ValueType, class JoinType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
|
||||||
|
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||||
|
|
||||||
|
ValueType result = init_result;
|
||||||
|
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||||
|
#pragma ivdep
|
||||||
|
#endif
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
ValueType tmp = ValueType();
|
||||||
|
lambda(i,tmp);
|
||||||
|
join(result,tmp);
|
||||||
|
}
|
||||||
|
init_result = result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final)
|
||||||
|
* for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed.
|
||||||
|
* Depending on the target execution space the operator might be called twice: once with final=false
|
||||||
|
* and once with final=true. When final==true val contains the prefix sum value. The contribution of this
|
||||||
|
* "i" needs to be added to val no matter whether final==true or not. In a serial execution
|
||||||
|
* (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set
|
||||||
|
* to the final sum value over all vector lanes.
|
||||||
|
* This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class FunctorType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
|
||||||
|
loop_boundaries, const FunctorType & lambda) {
|
||||||
|
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
|
||||||
|
typedef typename ValueTraits::value_type value_type ;
|
||||||
|
|
||||||
|
value_type scan_val = value_type();
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||||
|
#pragma ivdep
|
||||||
|
#endif
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
lambda(i,scan_val,true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
template<class FunctorType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void single(const Impl::VectorSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda) {
|
||||||
|
lambda();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class FunctorType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda) {
|
||||||
|
lambda();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class FunctorType, class ValueType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void single(const Impl::VectorSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda, ValueType& val) {
|
||||||
|
lambda(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class FunctorType, class ValueType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda, ValueType& val) {
|
||||||
|
lambda(val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // defined( KOKKOS_HAVE_SERIAL )
|
||||||
|
#endif /* #define KOKKOS_SERIAL_HPP */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
376
lib/kokkos/core/src/Kokkos_TaskPolicy.hpp
Executable file
376
lib/kokkos/core/src/Kokkos_TaskPolicy.hpp
Executable file
@ -0,0 +1,376 @@
|
|||||||
|
|
||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Experimental unified task-data parallel manycore LDRD
|
||||||
|
|
||||||
|
#ifndef KOKKOS_TASKPOLICY_HPP
|
||||||
|
#define KOKKOS_TASKPOLICY_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Core_fwd.hpp>
|
||||||
|
#include <impl/Kokkos_Traits.hpp>
|
||||||
|
#include <impl/Kokkos_Tags.hpp>
|
||||||
|
#include <impl/Kokkos_StaticAssert.hpp>
|
||||||
|
#include <impl/Kokkos_AllocationTracker.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Experimental {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
struct FutureValueTypeIsVoidError {};
|
||||||
|
|
||||||
|
template < class ExecSpace , class ResultType , class FunctorType >
|
||||||
|
class TaskMember ;
|
||||||
|
|
||||||
|
template< class ExecPolicy , class ResultType , class FunctorType >
|
||||||
|
class TaskForEach ;
|
||||||
|
|
||||||
|
template< class ExecPolicy , class ResultType , class FunctorType >
|
||||||
|
class TaskReduce ;
|
||||||
|
|
||||||
|
template< class ExecPolicy , class ResultType , class FunctorType >
|
||||||
|
struct TaskScan ;
|
||||||
|
|
||||||
|
} /* namespace Impl */
|
||||||
|
} /* namespace Experimental */
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Experimental {
|
||||||
|
|
||||||
|
/**\brief States of a task */
|
||||||
|
enum TaskState
|
||||||
|
{ TASK_STATE_NULL = 0 ///< Does not exist
|
||||||
|
, TASK_STATE_CONSTRUCTING = 1 ///< Is under construction
|
||||||
|
, TASK_STATE_WAITING = 2 ///< Is waiting for execution
|
||||||
|
, TASK_STATE_EXECUTING = 4 ///< Is executing
|
||||||
|
, TASK_STATE_COMPLETE = 8 ///< Execution is complete
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* Future< space > // value_type == void
|
||||||
|
* Future< value > // space == Default
|
||||||
|
* Future< value , space >
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template< class Arg1 = void , class Arg2 = void >
|
||||||
|
class Future {
|
||||||
|
private:
|
||||||
|
|
||||||
|
template< class , class , class > friend class Impl::TaskMember ;
|
||||||
|
template< class > friend class TaskPolicy ;
|
||||||
|
template< class , class > friend class Future ;
|
||||||
|
|
||||||
|
// Argument #2, if not void, must be the space.
|
||||||
|
enum { Arg1_is_space = Kokkos::Impl::is_execution_space< Arg1 >::value };
|
||||||
|
enum { Arg2_is_space = Kokkos::Impl::is_execution_space< Arg2 >::value };
|
||||||
|
enum { Arg2_is_void = Kokkos::Impl::is_same< Arg2 , void >::value };
|
||||||
|
|
||||||
|
struct ErrorNoExecutionSpace {};
|
||||||
|
|
||||||
|
enum { Opt1 = Arg1_is_space && Arg2_is_void
|
||||||
|
, Opt2 = ! Arg1_is_space && Arg2_is_void
|
||||||
|
, Opt3 = ! Arg1_is_space && Arg2_is_space
|
||||||
|
, OptOK = Kokkos::Impl::StaticAssert< Opt1 || Opt2 || Opt3 , ErrorNoExecutionSpace >::value
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Kokkos::Impl::if_c< Opt2 || Opt3 , Arg1 , void >::type
|
||||||
|
ValueType ;
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Kokkos::Impl::if_c< Opt1 , Arg1 , typename
|
||||||
|
Kokkos::Impl::if_c< Opt2 , Kokkos::DefaultExecutionSpace , typename
|
||||||
|
Kokkos::Impl::if_c< Opt3 , Arg2 , void
|
||||||
|
>::type >::type >::type
|
||||||
|
ExecutionSpace ;
|
||||||
|
|
||||||
|
typedef Impl::TaskMember< ExecutionSpace , void , void > TaskRoot ;
|
||||||
|
typedef Impl::TaskMember< ExecutionSpace , ValueType , void > TaskValue ;
|
||||||
|
|
||||||
|
TaskRoot * m_task ;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef ValueType value_type;
|
||||||
|
typedef ExecutionSpace execution_space ;
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
TaskState get_task_state() const
|
||||||
|
{ return 0 != m_task ? m_task->get_state() : TASK_STATE_NULL ; }
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
explicit
|
||||||
|
Future( TaskRoot * task )
|
||||||
|
: m_task(0)
|
||||||
|
{ TaskRoot::assign( & m_task , TaskRoot::template verify_type< value_type >( task ) ); }
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
~Future() { TaskRoot::assign( & m_task , 0 ); }
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Future() : m_task(0) {}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Future( const Future & rhs )
|
||||||
|
: m_task(0)
|
||||||
|
{ TaskRoot::assign( & m_task , rhs.m_task ); }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Future & operator = ( const Future & rhs )
|
||||||
|
{ TaskRoot::assign( & m_task , rhs.m_task ); return *this ; }
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
template< class A1 , class A2 >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Future( const Future<A1,A2> & rhs )
|
||||||
|
: m_task(0)
|
||||||
|
{ TaskRoot::assign( & m_task , TaskRoot::template verify_type< value_type >( rhs.m_task ) ); }
|
||||||
|
|
||||||
|
template< class A1 , class A2 >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Future & operator = ( const Future<A1,A2> & rhs )
|
||||||
|
{ TaskRoot::assign( & m_task , TaskRoot::template verify_type< value_type >( rhs.m_task ) ); return *this ; }
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
typedef typename TaskValue::get_result_type get_result_type ;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
get_result_type get() const
|
||||||
|
{ return static_cast<TaskValue*>( m_task )->get(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template< class T >
|
||||||
|
struct is_future : public Kokkos::Impl::bool_< false > {};
|
||||||
|
|
||||||
|
template< class Arg0 , class Arg1 >
|
||||||
|
struct is_future< Kokkos::Experimental::Future<Arg0,Arg1> > : public Kokkos::Impl::bool_< true > {};
|
||||||
|
|
||||||
|
} /* namespace Impl */
|
||||||
|
} /* namespace Experimental */
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Experimental {
|
||||||
|
|
||||||
|
/** \brief If the argument is an execution space then a serial task in that space */
|
||||||
|
template< class Arg0 = Kokkos::DefaultExecutionSpace >
|
||||||
|
class TaskPolicy {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef typename Arg0::execution_space execution_space ;
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
/** \brief Create a serial task with storage for dependences.
|
||||||
|
*
|
||||||
|
* Postcondition: Task is in the 'constructing' state.
|
||||||
|
*/
|
||||||
|
template< class FunctorType >
|
||||||
|
Future< typename FunctorType::value_type , execution_space >
|
||||||
|
create( const FunctorType & functor
|
||||||
|
, const unsigned dependence_capacity /* = default */ ) const ;
|
||||||
|
|
||||||
|
/** \brief Create a foreach task with storage for dependences. */
|
||||||
|
template< class ExecPolicy , class FunctorType >
|
||||||
|
Future< typename FunctorType::value_type , execution_space >
|
||||||
|
create_foreach( const ExecPolicy & policy
|
||||||
|
, const FunctorType & functor
|
||||||
|
, const unsigned dependence_capacity /* = default */ ) const ;
|
||||||
|
|
||||||
|
/** \brief Create a reduce task with storage for dependences. */
|
||||||
|
template< class ExecPolicy , class FunctorType >
|
||||||
|
Future< typename FunctorType::value_type , execution_space >
|
||||||
|
create_reduce( const ExecPolicy & policy
|
||||||
|
, const FunctorType & functor
|
||||||
|
, const unsigned dependence_capacity /* = default */ ) const ;
|
||||||
|
|
||||||
|
/** \brief Create a scan task with storage for dependences. */
|
||||||
|
template< class ExecPolicy , class FunctorType >
|
||||||
|
Future< typename FunctorType::value_type , execution_space >
|
||||||
|
create_scan( const ExecPolicy & policy
|
||||||
|
, const FunctorType & functor
|
||||||
|
, const unsigned dependence_capacity /* = default */ ) const ;
|
||||||
|
|
||||||
|
/** \brief Set dependence that 'after' cannot start execution
|
||||||
|
* until 'before' has completed.
|
||||||
|
*
|
||||||
|
* Precondition: The 'after' task must be in then 'Constructing' state.
|
||||||
|
*/
|
||||||
|
template< class TA , class TB >
|
||||||
|
void set_dependence( const Future<TA,execution_space> & after
|
||||||
|
, const Future<TB,execution_space> & before ) const ;
|
||||||
|
|
||||||
|
/** \brief Spawn a task in the 'Constructing' state
|
||||||
|
*
|
||||||
|
* Precondition: Task is in the 'constructing' state.
|
||||||
|
* Postcondition: Task is waiting, executing, or complete.
|
||||||
|
*/
|
||||||
|
template< class T >
|
||||||
|
const Future<T,execution_space> &
|
||||||
|
spawn( const Future<T,execution_space> & ) const ;
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
/** \brief Query dependence of an executing task */
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
Future< execution_space >
|
||||||
|
get_dependence( FunctorType * , const int ) const ;
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
/** \brief Clear current dependences of an executing task
|
||||||
|
* in preparation for setting new dependences and
|
||||||
|
* respawning.
|
||||||
|
*
|
||||||
|
* Precondition: The functor must be a task in the executing state.
|
||||||
|
*/
|
||||||
|
template< class FunctorType >
|
||||||
|
void clear_dependence( FunctorType * ) const ;
|
||||||
|
|
||||||
|
/** \brief Set dependence that 'after' cannot start execution
|
||||||
|
* until 'before' has completed.
|
||||||
|
*
|
||||||
|
* The 'after' functor must be in the executing state
|
||||||
|
*/
|
||||||
|
template< class FunctorType , class TB >
|
||||||
|
void set_dependence( FunctorType * after
|
||||||
|
, const Future<TB,execution_space> & before ) const ;
|
||||||
|
|
||||||
|
/** \brief Respawn (reschedule) an executing task to be called again
|
||||||
|
* after all dependences have completed.
|
||||||
|
*/
|
||||||
|
template< class FunctorType >
|
||||||
|
void respawn( FunctorType * ) const ;
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/** \brief Create and spawn a single-thread task */
|
||||||
|
template< class ExecSpace , class FunctorType >
|
||||||
|
inline
|
||||||
|
Future< typename FunctorType::value_type , ExecSpace >
|
||||||
|
spawn( TaskPolicy<ExecSpace> & policy , const FunctorType & functor )
|
||||||
|
{ return policy.spawn( policy.create( functor ) ); }
|
||||||
|
|
||||||
|
/** \brief Create and spawn a single-thread task with dependences */
|
||||||
|
template< class ExecSpace , class FunctorType , class Arg0 , class Arg1 >
|
||||||
|
inline
|
||||||
|
Future< typename FunctorType::value_type , ExecSpace >
|
||||||
|
spawn( TaskPolicy<ExecSpace> & policy
|
||||||
|
, const FunctorType & functor
|
||||||
|
, const Future<Arg0,Arg1> & before_0
|
||||||
|
, const Future<Arg0,Arg1> & before_1 )
|
||||||
|
{
|
||||||
|
Future< typename FunctorType::value_type , ExecSpace > f ;
|
||||||
|
f = policy.create( functor , 2 );
|
||||||
|
policy.add_dependence( f , before_0 );
|
||||||
|
policy.add_dependence( f , before_1 );
|
||||||
|
policy.spawn( f );
|
||||||
|
return f ;
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/** \brief Create and spawn a parallel_for task */
|
||||||
|
template< class ExecSpace , class ParallelPolicyType , class FunctorType >
|
||||||
|
inline
|
||||||
|
Future< typename FunctorType::value_type , ExecSpace >
|
||||||
|
spawn_foreach( TaskPolicy<ExecSpace> & task_policy
|
||||||
|
, const ParallelPolicyType & parallel_policy
|
||||||
|
, const FunctorType & functor )
|
||||||
|
{ return task_policy.spawn( task_policy.create_foreach( parallel_policy , functor ) ); }
|
||||||
|
|
||||||
|
/** \brief Create and spawn a parallel_reduce task */
|
||||||
|
template< class ExecSpace , class ParallelPolicyType , class FunctorType >
|
||||||
|
inline
|
||||||
|
Future< typename FunctorType::value_type , ExecSpace >
|
||||||
|
spawn_reduce( TaskPolicy<ExecSpace> & task_policy
|
||||||
|
, const ParallelPolicyType & parallel_policy
|
||||||
|
, const FunctorType & functor )
|
||||||
|
{ return task_policy.spawn( task_policy.create_reduce( parallel_policy , functor ) ); }
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/** \brief Respawn a task functor with dependences */
|
||||||
|
template< class ExecSpace , class FunctorType , class Arg0 , class Arg1 >
|
||||||
|
inline
|
||||||
|
void respawn( TaskPolicy<ExecSpace> & policy
|
||||||
|
, FunctorType * functor
|
||||||
|
, const Future<Arg0,Arg1> & before_0
|
||||||
|
, const Future<Arg0,Arg1> & before_1
|
||||||
|
)
|
||||||
|
{
|
||||||
|
policy.clear_dependence( functor );
|
||||||
|
policy.add_dependence( functor , before_0 );
|
||||||
|
policy.add_dependence( functor , before_1 );
|
||||||
|
policy.respawn( functor );
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template< class ExecSpace >
|
||||||
|
void wait( TaskPolicy< ExecSpace > & );
|
||||||
|
|
||||||
|
} /* namespace Experimental */
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* #define KOKKOS_TASKPOLICY_HPP */
|
||||||
|
|
||||||
217
lib/kokkos/core/src/Kokkos_Threads.hpp
Executable file
217
lib/kokkos/core/src/Kokkos_Threads.hpp
Executable file
@ -0,0 +1,217 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_THREADS_HPP
|
||||||
|
#define KOKKOS_THREADS_HPP
|
||||||
|
|
||||||
|
#include <Kokkos_Core_fwd.hpp>
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <iosfwd>
|
||||||
|
#include <Kokkos_HostSpace.hpp>
|
||||||
|
#include <Kokkos_ScratchSpace.hpp>
|
||||||
|
#include <Kokkos_Layout.hpp>
|
||||||
|
#include <Kokkos_MemoryTraits.hpp>
|
||||||
|
#include <impl/Kokkos_Tags.hpp>
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
class ThreadsExec ;
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Execution space for a pool of Pthreads or C11 threads on a CPU. */
|
||||||
|
class Threads {
|
||||||
|
public:
|
||||||
|
//! \name Type declarations that all Kokkos devices must provide.
|
||||||
|
//@{
|
||||||
|
//! Tag this class as a kokkos execution space
|
||||||
|
typedef Threads execution_space ;
|
||||||
|
typedef Kokkos::HostSpace memory_space ;
|
||||||
|
|
||||||
|
//! This execution space preferred device_type
|
||||||
|
typedef Kokkos::Device<execution_space,memory_space> device_type;
|
||||||
|
|
||||||
|
typedef Kokkos::LayoutRight array_layout ;
|
||||||
|
typedef memory_space::size_type size_type ;
|
||||||
|
|
||||||
|
typedef ScratchMemorySpace< Threads > scratch_memory_space ;
|
||||||
|
|
||||||
|
|
||||||
|
//@}
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
//! \name Static functions that all Kokkos devices must implement.
|
||||||
|
//@{
|
||||||
|
|
||||||
|
/// \brief True if and only if this method is being called in a
|
||||||
|
/// thread-parallel function.
|
||||||
|
static int in_parallel();
|
||||||
|
|
||||||
|
/** \brief Set the device in a "sleep" state.
|
||||||
|
*
|
||||||
|
* This function sets the device in a "sleep" state in which it is
|
||||||
|
* not ready for work. This may consume less resources than if the
|
||||||
|
* device were in an "awake" state, but it may also take time to
|
||||||
|
* bring the device from a sleep state to be ready for work.
|
||||||
|
*
|
||||||
|
* \return True if the device is in the "sleep" state, else false if
|
||||||
|
* the device is actively working and could not enter the "sleep"
|
||||||
|
* state.
|
||||||
|
*/
|
||||||
|
static bool sleep();
|
||||||
|
|
||||||
|
/// \brief Wake the device from the 'sleep' state so it is ready for work.
|
||||||
|
///
|
||||||
|
/// \return True if the device is in the "ready" state, else "false"
|
||||||
|
/// if the device is actively working (which also means that it's
|
||||||
|
/// awake).
|
||||||
|
static bool wake();
|
||||||
|
|
||||||
|
/// \brief Wait until all dispatched functors complete.
|
||||||
|
///
|
||||||
|
/// The parallel_for or parallel_reduce dispatch of a functor may
|
||||||
|
/// return asynchronously, before the functor completes. This
|
||||||
|
/// method does not return until all dispatched functors on this
|
||||||
|
/// device have completed.
|
||||||
|
static void fence();
|
||||||
|
|
||||||
|
/// \brief Free any resources being consumed by the device.
|
||||||
|
///
|
||||||
|
/// For the Threads device, this terminates spawned worker threads.
|
||||||
|
static void finalize();
|
||||||
|
|
||||||
|
/// \brief Print configuration information to the given output stream.
|
||||||
|
static void print_configuration( std::ostream & , const bool detail = false );
|
||||||
|
|
||||||
|
//@}
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
/*------------------------------------------------------------------------*/
|
||||||
|
//! \name Space-specific functions
|
||||||
|
//@{
|
||||||
|
|
||||||
|
/** \brief Initialize the device in the "ready to work" state.
|
||||||
|
*
|
||||||
|
* The device is initialized in a "ready to work" or "awake" state.
|
||||||
|
* This state reduces latency and thus improves performance when
|
||||||
|
* dispatching work. However, the "awake" state consumes resources
|
||||||
|
* even when no work is being done. You may call sleep() to put
|
||||||
|
* the device in a "sleeping" state that does not consume as many
|
||||||
|
* resources, but it will take time (latency) to awaken the device
|
||||||
|
* again (via the wake()) method so that it is ready for work.
|
||||||
|
*
|
||||||
|
* Teams of threads are distributed as evenly as possible across
|
||||||
|
* the requested number of numa regions and cores per numa region.
|
||||||
|
* A team will not be split across a numa region.
|
||||||
|
*
|
||||||
|
* If the 'use_' arguments are not supplied the hwloc is queried
|
||||||
|
* to use all available cores.
|
||||||
|
*/
|
||||||
|
static void initialize( unsigned threads_count = 0 ,
|
||||||
|
unsigned use_numa_count = 0 ,
|
||||||
|
unsigned use_cores_per_numa = 0 ,
|
||||||
|
bool allow_asynchronous_threadpool = false );
|
||||||
|
|
||||||
|
static int is_initialized();
|
||||||
|
|
||||||
|
static Threads & instance( int = 0 );
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
static int thread_pool_size( int depth = 0 );
|
||||||
|
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
static int thread_pool_rank();
|
||||||
|
#else
|
||||||
|
KOKKOS_INLINE_FUNCTION static int thread_pool_rank() { return 0 ; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
inline static unsigned max_hardware_threads() { return thread_pool_size(0); }
|
||||||
|
KOKKOS_INLINE_FUNCTION static unsigned hardware_thread_id() { return thread_pool_rank(); }
|
||||||
|
|
||||||
|
//@}
|
||||||
|
//----------------------------------------
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct VerifyExecutionCanAccessMemorySpace
|
||||||
|
< Kokkos::Threads::memory_space
|
||||||
|
, Kokkos::Threads::scratch_memory_space
|
||||||
|
>
|
||||||
|
{
|
||||||
|
enum { value = true };
|
||||||
|
inline static void verify( void ) { }
|
||||||
|
inline static void verify( const void * ) { }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
#include <Kokkos_ExecPolicy.hpp>
|
||||||
|
#include <Kokkos_Parallel.hpp>
|
||||||
|
#include <Threads/Kokkos_ThreadsExec.hpp>
|
||||||
|
#include <Threads/Kokkos_ThreadsTeam.hpp>
|
||||||
|
#include <Threads/Kokkos_Threads_Parallel.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
|
||||||
|
#endif /* #define KOKKOS_THREADS_HPP */
|
||||||
|
|
||||||
|
|
||||||
53
lib/kokkos/core/src/Kokkos_Vectorization.hpp
Executable file
53
lib/kokkos/core/src/Kokkos_Vectorization.hpp
Executable file
@ -0,0 +1,53 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
/// \file Kokkos_Vectorization.hpp
|
||||||
|
/// \brief Declaration and definition of Kokkos::Vectorization interface.
|
||||||
|
#ifndef KOKKOS_VECTORIZATION_HPP
|
||||||
|
#define KOKKOS_VECTORIZATION_HPP
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_CUDA )
|
||||||
|
#include <Cuda/Kokkos_Cuda_Vectorization.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
1915
lib/kokkos/core/src/Kokkos_View.hpp
Executable file
1915
lib/kokkos/core/src/Kokkos_View.hpp
Executable file
File diff suppressed because it is too large
Load Diff
140
lib/kokkos/core/src/Kokkos_hwloc.hpp
Executable file
140
lib/kokkos/core/src/Kokkos_hwloc.hpp
Executable file
@ -0,0 +1,140 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_HWLOC_HPP
|
||||||
|
#define KOKKOS_HWLOC_HPP
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Minimal subset of logical 'hwloc' functionality available
|
||||||
|
* from http://www.open-mpi.org/projects/hwloc/.
|
||||||
|
*
|
||||||
|
* The calls are NOT thread safe in order to avoid mutexes,
|
||||||
|
* memory allocations, or other actions which could give the
|
||||||
|
* runtime system an opportunity to migrate the threads or
|
||||||
|
* touch allocated memory during the function calls.
|
||||||
|
*
|
||||||
|
* All calls to these functions should be performed by a thread
|
||||||
|
* when it has guaranteed exclusive access; e.g., for OpenMP
|
||||||
|
* within a 'critical' region.
|
||||||
|
*/
|
||||||
|
namespace hwloc {
|
||||||
|
|
||||||
|
/** \brief Query if hwloc is available */
|
||||||
|
bool available();
|
||||||
|
|
||||||
|
/** \brief Query number of available NUMA regions.
|
||||||
|
* This will be less than the hardware capacity
|
||||||
|
* if the MPI process is pinned to a NUMA region.
|
||||||
|
*/
|
||||||
|
unsigned get_available_numa_count();
|
||||||
|
|
||||||
|
/** \brief Query number of available cores per NUMA regions.
|
||||||
|
* This will be less than the hardware capacity
|
||||||
|
* if the MPI process is pinned to a set of cores.
|
||||||
|
*/
|
||||||
|
unsigned get_available_cores_per_numa();
|
||||||
|
|
||||||
|
/** \brief Query number of available "hard" threads per core; i.e., hyperthreads */
|
||||||
|
unsigned get_available_threads_per_core();
|
||||||
|
|
||||||
|
} /* namespace hwloc */
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
// Internal functions for binding persistent spawned threads.
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace hwloc {
|
||||||
|
|
||||||
|
/** \brief Recommend mapping of threads onto cores.
|
||||||
|
*
|
||||||
|
* If thread_count == 0 then choose and set a value.
|
||||||
|
* If use_numa_count == 0 then choose and set a value.
|
||||||
|
* If use_cores_per_numa == 0 then choose and set a value.
|
||||||
|
*
|
||||||
|
* Return 0 if asynchronous,
|
||||||
|
* Return 1 if synchronous and threads_coord[0] is process core
|
||||||
|
*/
|
||||||
|
unsigned thread_mapping( const char * const label ,
|
||||||
|
const bool allow_async ,
|
||||||
|
unsigned & thread_count ,
|
||||||
|
unsigned & use_numa_count ,
|
||||||
|
unsigned & use_cores_per_numa ,
|
||||||
|
std::pair<unsigned,unsigned> threads_coord[] );
|
||||||
|
|
||||||
|
/** \brief Query core-coordinate of the current thread
|
||||||
|
* with respect to the core_topology.
|
||||||
|
*
|
||||||
|
* As long as the thread is running within the
|
||||||
|
* process binding the following condition holds.
|
||||||
|
*
|
||||||
|
* core_coordinate.first < core_topology.first
|
||||||
|
* core_coordinate.second < core_topology.second
|
||||||
|
*/
|
||||||
|
std::pair<unsigned,unsigned> get_this_thread_coordinate();
|
||||||
|
|
||||||
|
/** \brief Bind the current thread to a core. */
|
||||||
|
bool bind_this_thread( const std::pair<unsigned,unsigned> );
|
||||||
|
|
||||||
|
/** \brief Bind the current thread to one of the cores in the list.
|
||||||
|
* Set that entry to (~0,~0) and return the index.
|
||||||
|
* If binding fails return ~0.
|
||||||
|
*/
|
||||||
|
unsigned bind_this_thread( const unsigned coordinate_count ,
|
||||||
|
std::pair<unsigned,unsigned> coordinate[] );
|
||||||
|
|
||||||
|
/** \brief Unbind the current thread back to the original process binding */
|
||||||
|
bool unbind_this_thread();
|
||||||
|
|
||||||
|
} /* namespace hwloc */
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* #define KOKKOS_HWLOC_HPP */
|
||||||
|
|
||||||
118
lib/kokkos/core/src/Makefile
Executable file
118
lib/kokkos/core/src/Makefile
Executable file
@ -0,0 +1,118 @@
|
|||||||
|
KOKKOS_PATH = ../..
|
||||||
|
|
||||||
|
PREFIX ?= /usr/local/lib/kokkos
|
||||||
|
|
||||||
|
default: messages build-lib
|
||||||
|
echo "End Build"
|
||||||
|
|
||||||
|
|
||||||
|
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
CXX = nvcc_wrapper
|
||||||
|
CXXFLAGS ?= -O3
|
||||||
|
LINK = nvcc_wrapper
|
||||||
|
LINKFLAGS ?=
|
||||||
|
else
|
||||||
|
CXX ?= g++
|
||||||
|
CXXFLAGS ?= -O3
|
||||||
|
LINK ?= g++
|
||||||
|
LINKFLAGS ?=
|
||||||
|
endif
|
||||||
|
|
||||||
|
PWD = $(shell pwd)
|
||||||
|
|
||||||
|
KOKKOS_HEADERS_INCLUDE = $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
|
||||||
|
KOKKOS_HEADERS_INCLUDE_IMPL = $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
|
||||||
|
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
|
||||||
|
KOKKOS_HEADERS_INCLUDE_IMPL += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
|
||||||
|
KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
|
||||||
|
|
||||||
|
CONDITIONAL_COPIES =
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||||
|
KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
|
||||||
|
CONDITIONAL_COPIES += copy-cuda
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||||
|
KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
|
||||||
|
CONDITIONAL_COPIES += copy-threads
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||||
|
KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
|
||||||
|
CONDITIONAL_COPIES += copy-openmp
|
||||||
|
endif
|
||||||
|
|
||||||
|
messages:
|
||||||
|
echo "Start Build"
|
||||||
|
|
||||||
|
build-makefile-kokkos:
|
||||||
|
rm -f Makefile.kokkos
|
||||||
|
echo "#Global Settings used to generate this library" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_PATH = $(PREFIX)" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_DEVICES = $(KOKKOS_DEVICES)" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_ARCH = $(KOKKOS_ARCH)" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_DEBUG = $(KOKKOS_DEBUG)" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_USE_TPLS = $(KOKKOS_USE_TPLS)" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_CXX_STANDARD = $(KOKKOS_CXX_STANDARD)" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_CUDA_OPTIONS = $(KOKKOS_CUDA_OPTIONS)" >> Makefile.kokkos
|
||||||
|
echo "CXX ?= $(CXX)" >> Makefile.kokkos
|
||||||
|
echo "" >> Makefile.kokkos
|
||||||
|
echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_HEADERS = $(KOKKOS_HEADERS)" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_SRC = $(KOKKOS_SRC)" >> Makefile.kokkos
|
||||||
|
echo "" >> Makefile.kokkos
|
||||||
|
echo "#Variables used in application Makefiles" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_CPP_DEPENDS = $(KOKKOS_CPP_DEPENDS)" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_CXXFLAGS = $(KOKKOS_CXXFLAGS)" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_CPPFLAGS = $(KOKKOS_CPPFLAGS)" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_LINK_DEPENDS = $(KOKKOS_LINK_DEPENDS)" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_LIBS = $(KOKKOS_LIBS)" >> Makefile.kokkos
|
||||||
|
echo "KOKKOS_LDFLAGS = $(KOKKOS_LDFLAGS)" >> Makefile.kokkos
|
||||||
|
sed \
|
||||||
|
-e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \
|
||||||
|
-e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \
|
||||||
|
-e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \
|
||||||
|
-e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \
|
||||||
|
-e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \
|
||||||
|
-e 's|= KokkosCore_config.h|= $(PREFIX)/include/KokkosCore_config.h|g' Makefile.kokkos \
|
||||||
|
> Makefile.kokkos.tmp
|
||||||
|
mv -f Makefile.kokkos.tmp Makefile.kokkos
|
||||||
|
|
||||||
|
build-lib: build-makefile-kokkos $(KOKKOS_LINK_DEPENDS)
|
||||||
|
|
||||||
|
mkdir:
|
||||||
|
mkdir -p $(PREFIX)
|
||||||
|
mkdir -p $(PREFIX)/include
|
||||||
|
mkdir -p $(PREFIX)/lib
|
||||||
|
mkdir -p $(PREFIX)/include/impl
|
||||||
|
|
||||||
|
copy-cuda: mkdir
|
||||||
|
mkdir -p $(PREFIX)/include/Cuda
|
||||||
|
cp $(KOKKOS_HEADERS_CUDA) $(PREFIX)/include/Cuda
|
||||||
|
|
||||||
|
copy-threads: mkdir
|
||||||
|
mkdir -p $(PREFIX)/include/Threads
|
||||||
|
cp $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads
|
||||||
|
|
||||||
|
copy-openmp: mkdir
|
||||||
|
mkdir -p $(PREFIX)/include/OpenMP
|
||||||
|
cp $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP
|
||||||
|
|
||||||
|
install: mkdir $(CONDITIONAL_COPIES) build-lib
|
||||||
|
cp $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include
|
||||||
|
cp $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl
|
||||||
|
cp Makefile.kokkos $(PREFIX)
|
||||||
|
cp libkokkos.a $(PREFIX)/lib
|
||||||
|
cp KokkosCore_config.h $(PREFIX)/include
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
clean: kokkos-clean
|
||||||
|
rm Makefile.kokkos
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
496
lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp
Executable file
496
lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp
Executable file
@ -0,0 +1,496 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_OPENMP_PARALLEL_HPP
|
||||||
|
#define KOKKOS_OPENMP_PARALLEL_HPP
|
||||||
|
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Parallel.hpp>
|
||||||
|
#include <OpenMP/Kokkos_OpenMPexec.hpp>
|
||||||
|
#include <impl/Kokkos_FunctorAdapter.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
|
||||||
|
class ParallelFor< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > Policy ;
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< Impl::is_same< typename PType::work_tag , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, const PType & range )
|
||||||
|
{
|
||||||
|
const typename PType::member_type work_end = range.end();
|
||||||
|
for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
|
||||||
|
functor( iwork );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< ! Impl::is_same< typename PType::work_tag , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, const PType & range )
|
||||||
|
{
|
||||||
|
const typename PType::member_type work_end = range.end();
|
||||||
|
for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
|
||||||
|
functor( typename PType::work_tag() , iwork );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
inline
|
||||||
|
ParallelFor( const FunctorType & functor
|
||||||
|
, const Policy & policy )
|
||||||
|
{
|
||||||
|
OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for");
|
||||||
|
OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for");
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
OpenMPexec & exec = * OpenMPexec::get_thread_omp();
|
||||||
|
driver( functor , typename Policy::WorkRange( policy , exec.pool_rank() , exec.pool_size() ) );
|
||||||
|
}
|
||||||
|
/* END #pragma omp parallel */
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
|
||||||
|
class ParallelReduce< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > Policy ;
|
||||||
|
typedef typename Policy::work_tag WorkTag ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , WorkTag > ValueJoin ;
|
||||||
|
|
||||||
|
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||||
|
typedef typename ValueTraits::reference_type reference_type ;
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< Impl::is_same< typename PType::work_tag , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, reference_type update
|
||||||
|
, const PType & range )
|
||||||
|
{
|
||||||
|
const typename PType::member_type work_end = range.end();
|
||||||
|
for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
|
||||||
|
functor( iwork , update );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< ! Impl::is_same< typename PType::work_tag , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, reference_type update
|
||||||
|
, const PType & range )
|
||||||
|
{
|
||||||
|
const typename PType::member_type work_end = range.end();
|
||||||
|
for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
|
||||||
|
functor( typename PType::work_tag() , iwork , update );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
template< class ViewType >
|
||||||
|
inline
|
||||||
|
ParallelReduce( typename Impl::enable_if<
|
||||||
|
( Impl::is_view< ViewType >::value &&
|
||||||
|
Impl::is_same< typename ViewType::memory_space , HostSpace >::value
|
||||||
|
), const FunctorType & >::type functor
|
||||||
|
, const Policy & policy
|
||||||
|
, const ViewType & result_view )
|
||||||
|
{
|
||||||
|
OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce");
|
||||||
|
OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce");
|
||||||
|
|
||||||
|
OpenMPexec::resize_scratch( ValueTraits::value_size( functor ) , 0 );
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
OpenMPexec & exec = * OpenMPexec::get_thread_omp();
|
||||||
|
|
||||||
|
driver( functor
|
||||||
|
, ValueInit::init( functor , exec.scratch_reduce() )
|
||||||
|
, typename Policy::WorkRange( policy , exec.pool_rank() , exec.pool_size() )
|
||||||
|
);
|
||||||
|
}
|
||||||
|
/* END #pragma omp parallel */
|
||||||
|
|
||||||
|
{
|
||||||
|
const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() );
|
||||||
|
|
||||||
|
for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) {
|
||||||
|
ValueJoin::join( functor , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() );
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , ptr );
|
||||||
|
|
||||||
|
if ( result_view.ptr_on_device() ) {
|
||||||
|
const int n = ValueTraits::value_count( functor );
|
||||||
|
|
||||||
|
for ( int j = 0 ; j < n ; ++j ) { result_view.ptr_on_device()[j] = ptr[j] ; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
|
||||||
|
class ParallelScan< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::OpenMP > Policy ;
|
||||||
|
typedef typename Policy::work_tag WorkTag ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , WorkTag > ValueJoin ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueOps< FunctorType , WorkTag > ValueOps ;
|
||||||
|
|
||||||
|
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||||
|
typedef typename ValueTraits::reference_type reference_type ;
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< Impl::is_same< typename PType::work_tag , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, reference_type update
|
||||||
|
, const PType & range
|
||||||
|
, const bool final )
|
||||||
|
{
|
||||||
|
const typename PType::member_type work_end = range.end();
|
||||||
|
for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
|
||||||
|
functor( iwork , update , final );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< ! Impl::is_same< typename PType::work_tag , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, reference_type update
|
||||||
|
, const PType & range
|
||||||
|
, const bool final )
|
||||||
|
{
|
||||||
|
const typename PType::member_type work_end = range.end();
|
||||||
|
for ( typename PType::member_type iwork = range.begin() ; iwork < work_end ; ++iwork ) {
|
||||||
|
functor( typename PType::work_tag() , iwork , update , final );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
inline
|
||||||
|
ParallelScan( const FunctorType & functor
|
||||||
|
, const Policy & policy )
|
||||||
|
{
|
||||||
|
OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_scan");
|
||||||
|
OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_scan");
|
||||||
|
|
||||||
|
OpenMPexec::resize_scratch( 2 * ValueTraits::value_size( functor ) , 0 );
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
OpenMPexec & exec = * OpenMPexec::get_thread_omp();
|
||||||
|
|
||||||
|
driver( functor
|
||||||
|
, ValueInit::init( functor , pointer_type( exec.scratch_reduce() ) + ValueTraits::value_count( functor ) )
|
||||||
|
, typename Policy::WorkRange( policy , exec.pool_rank() , exec.pool_size() )
|
||||||
|
, false );
|
||||||
|
}
|
||||||
|
/* END #pragma omp parallel */
|
||||||
|
|
||||||
|
{
|
||||||
|
const unsigned thread_count = OpenMPexec::pool_size();
|
||||||
|
const unsigned value_count = ValueTraits::value_count( functor );
|
||||||
|
|
||||||
|
pointer_type ptr_prev = 0 ;
|
||||||
|
|
||||||
|
for ( unsigned rank_rev = thread_count ; rank_rev-- ; ) {
|
||||||
|
|
||||||
|
pointer_type ptr = pointer_type( OpenMPexec::pool_rev(rank_rev)->scratch_reduce() );
|
||||||
|
|
||||||
|
if ( ptr_prev ) {
|
||||||
|
for ( unsigned i = 0 ; i < value_count ; ++i ) { ptr[i] = ptr_prev[ i + value_count ] ; }
|
||||||
|
ValueJoin::join( functor , ptr + value_count , ptr );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ValueInit::init( functor , ptr );
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr_prev = ptr ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
OpenMPexec & exec = * OpenMPexec::get_thread_omp();
|
||||||
|
|
||||||
|
driver( functor
|
||||||
|
, ValueOps::reference( pointer_type( exec.scratch_reduce() ) )
|
||||||
|
, typename Policy::WorkRange( policy , exec.pool_rank() , exec.pool_size() )
|
||||||
|
, true );
|
||||||
|
}
|
||||||
|
/* END #pragma omp parallel */
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 >
|
||||||
|
class ParallelFor< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > Policy ;
|
||||||
|
|
||||||
|
template< class TagType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, const typename Policy::member_type & member )
|
||||||
|
{ functor( member ); }
|
||||||
|
|
||||||
|
template< class TagType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, const typename Policy::member_type & member )
|
||||||
|
{ functor( TagType() , member ); }
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
inline
|
||||||
|
ParallelFor( const FunctorType & functor ,
|
||||||
|
const Policy & policy )
|
||||||
|
{
|
||||||
|
OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for");
|
||||||
|
OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for");
|
||||||
|
|
||||||
|
const size_t team_reduce_size = Policy::member_type::team_reduce_size();
|
||||||
|
const size_t team_shmem_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
|
||||||
|
|
||||||
|
OpenMPexec::resize_scratch( 0 , team_reduce_size + team_shmem_size );
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
typename Policy::member_type member( * OpenMPexec::get_thread_omp() , policy , team_shmem_size );
|
||||||
|
|
||||||
|
for ( ; member.valid() ; member.next() ) {
|
||||||
|
ParallelFor::template driver< typename Policy::work_tag >( functor , member );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* END #pragma omp parallel */
|
||||||
|
}
|
||||||
|
|
||||||
|
void wait() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 >
|
||||||
|
class ParallelReduce< FunctorType , Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > Policy ;
|
||||||
|
typedef typename Policy::work_tag WorkTag ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , WorkTag > ValueJoin ;
|
||||||
|
|
||||||
|
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||||
|
typedef typename ValueTraits::reference_type reference_type ;
|
||||||
|
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< Impl::is_same< typename PType::work_tag , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, const typename PType::member_type & member
|
||||||
|
, reference_type update )
|
||||||
|
{ functor( member , update ); }
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if< ! Impl::is_same< typename PType::work_tag , void >::value ,
|
||||||
|
const FunctorType & >::type functor
|
||||||
|
, const typename PType::member_type & member
|
||||||
|
, reference_type update )
|
||||||
|
{ functor( typename PType::work_tag() , member , update ); }
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
inline
|
||||||
|
ParallelReduce( const FunctorType & functor ,
|
||||||
|
const Policy & policy )
|
||||||
|
{
|
||||||
|
OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce");
|
||||||
|
|
||||||
|
const size_t team_reduce_size = Policy::member_type::team_reduce_size();
|
||||||
|
const size_t team_shmem_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
|
||||||
|
|
||||||
|
OpenMPexec::resize_scratch( ValueTraits::value_size( functor ) , team_reduce_size + team_shmem_size );
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
OpenMPexec & exec = * OpenMPexec::get_thread_omp();
|
||||||
|
|
||||||
|
reference_type update = ValueInit::init( functor , exec.scratch_reduce() );
|
||||||
|
|
||||||
|
for ( typename Policy::member_type member( exec , policy , team_shmem_size ); member.valid() ; member.next() ) {
|
||||||
|
ParallelReduce::template driver< Policy >( functor , member , update );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* END #pragma omp parallel */
|
||||||
|
|
||||||
|
{
|
||||||
|
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , WorkTag , reference_type > Join ;
|
||||||
|
|
||||||
|
const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() );
|
||||||
|
|
||||||
|
for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) {
|
||||||
|
Join::join( functor , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() );
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , ptr );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class ViewType >
|
||||||
|
inline
|
||||||
|
ParallelReduce( const FunctorType & functor ,
|
||||||
|
const Policy & policy ,
|
||||||
|
const ViewType & result )
|
||||||
|
{
|
||||||
|
OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce");
|
||||||
|
|
||||||
|
const size_t team_reduce_size = Policy::member_type::team_reduce_size();
|
||||||
|
const size_t team_shmem_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
|
||||||
|
|
||||||
|
OpenMPexec::resize_scratch( ValueTraits::value_size( functor ) , team_reduce_size + team_shmem_size );
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
OpenMPexec & exec = * OpenMPexec::get_thread_omp();
|
||||||
|
|
||||||
|
reference_type update = ValueInit::init( functor , exec.scratch_reduce() );
|
||||||
|
|
||||||
|
for ( typename Policy::member_type member( exec , policy , team_shmem_size ); member.valid() ; member.next() ) {
|
||||||
|
ParallelReduce::template driver< Policy >( functor , member , update );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* END #pragma omp parallel */
|
||||||
|
|
||||||
|
{
|
||||||
|
const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() );
|
||||||
|
|
||||||
|
for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) {
|
||||||
|
ValueJoin::join( functor , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() );
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , ptr );
|
||||||
|
|
||||||
|
const int n = ValueTraits::value_count( functor );
|
||||||
|
|
||||||
|
for ( int j = 0 ; j < n ; ++j ) { result.ptr_on_device()[j] = ptr[j] ; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void wait() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* KOKKOS_OPENMP_PARALLEL_HPP */
|
||||||
|
|
||||||
364
lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp
Executable file
364
lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp
Executable file
@ -0,0 +1,364 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <limits>
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
#include <impl/Kokkos_Error.hpp>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_OPENMP
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
int kokkos_omp_in_parallel();
|
||||||
|
|
||||||
|
int kokkos_omp_in_critical_region = ( Kokkos::HostSpace::register_in_parallel( kokkos_omp_in_parallel ) , 0 );
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
int kokkos_omp_in_parallel()
|
||||||
|
{
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
|
return omp_in_parallel() && ! kokkos_omp_in_critical_region ;
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
bool s_using_hwloc = false;
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
int OpenMPexec::m_map_rank[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 };
|
||||||
|
|
||||||
|
int OpenMPexec::m_pool_topo[ 4 ] = { 0 };
|
||||||
|
|
||||||
|
OpenMPexec::Pool OpenMPexec::m_pool;
|
||||||
|
|
||||||
|
void OpenMPexec::verify_is_process( const char * const label )
|
||||||
|
{
|
||||||
|
if ( omp_in_parallel() ) {
|
||||||
|
std::string msg( label );
|
||||||
|
msg.append( " ERROR: in parallel" );
|
||||||
|
Kokkos::Impl::throw_runtime_exception( msg );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void OpenMPexec::verify_initialized( const char * const label )
|
||||||
|
{
|
||||||
|
if ( 0 == m_pool[0] ) {
|
||||||
|
std::string msg( label );
|
||||||
|
msg.append( " ERROR: not initialized" );
|
||||||
|
Kokkos::Impl::throw_runtime_exception( msg );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void OpenMPexec::clear_scratch()
|
||||||
|
{
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
const int rank_rev = m_map_rank[ omp_get_thread_num() ];
|
||||||
|
m_pool.at(rank_rev).clear();
|
||||||
|
}
|
||||||
|
/* END #pragma omp parallel */
|
||||||
|
}
|
||||||
|
|
||||||
|
void OpenMPexec::resize_scratch( size_t reduce_size , size_t thread_size )
|
||||||
|
{
|
||||||
|
enum { ALIGN_MASK = Kokkos::Impl::MEMORY_ALIGNMENT - 1 };
|
||||||
|
enum { ALLOC_EXEC = ( sizeof(OpenMPexec) + ALIGN_MASK ) & ~ALIGN_MASK };
|
||||||
|
|
||||||
|
const size_t old_reduce_size = m_pool[0] ? m_pool[0]->m_scratch_reduce_end : 0 ;
|
||||||
|
const size_t old_thread_size = m_pool[0] ? m_pool[0]->m_scratch_thread_end - m_pool[0]->m_scratch_reduce_end : 0 ;
|
||||||
|
|
||||||
|
reduce_size = ( reduce_size + ALIGN_MASK ) & ~ALIGN_MASK ;
|
||||||
|
thread_size = ( thread_size + ALIGN_MASK ) & ~ALIGN_MASK ;
|
||||||
|
|
||||||
|
// Requesting allocation and old allocation is too small:
|
||||||
|
|
||||||
|
const bool allocate = ( old_reduce_size < reduce_size ) ||
|
||||||
|
( old_thread_size < thread_size );
|
||||||
|
|
||||||
|
if ( allocate ) {
|
||||||
|
if ( reduce_size < old_reduce_size ) { reduce_size = old_reduce_size ; }
|
||||||
|
if ( thread_size < old_thread_size ) { thread_size = old_thread_size ; }
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t alloc_size = allocate ? ALLOC_EXEC + reduce_size + thread_size : 0 ;
|
||||||
|
const int pool_size = m_pool_topo[0] ;
|
||||||
|
|
||||||
|
if ( allocate ) {
|
||||||
|
|
||||||
|
clear_scratch();
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
const int rank_rev = m_map_rank[ omp_get_thread_num() ];
|
||||||
|
const int rank = pool_size - ( rank_rev + 1 );
|
||||||
|
|
||||||
|
m_pool.at(rank_rev) = HostSpace::allocate_and_track( "openmp_scratch", alloc_size );
|
||||||
|
new ( m_pool[ rank_rev ] ) OpenMPexec( rank , ALLOC_EXEC , reduce_size , thread_size );
|
||||||
|
}
|
||||||
|
/* END #pragma omp parallel */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
int OpenMP::is_initialized()
|
||||||
|
{ return 0 != Impl::OpenMPexec::m_pool[0]; }
|
||||||
|
|
||||||
|
void OpenMP::initialize( unsigned thread_count ,
|
||||||
|
unsigned use_numa_count ,
|
||||||
|
unsigned use_cores_per_numa )
|
||||||
|
{
|
||||||
|
// Before any other call to OMP query the maximum number of threads
|
||||||
|
// and save the value for re-initialization unit testing.
|
||||||
|
|
||||||
|
//Using omp_get_max_threads(); is problematic in conjunction with
|
||||||
|
//Hwloc on Intel (essentially an initial call to the OpenMP runtime
|
||||||
|
//without a parallel region before will set a process mask for a single core
|
||||||
|
//The runtime will than bind threads for a parallel region to other cores on the
|
||||||
|
//entering the first parallel region and make the process mask the aggregate of
|
||||||
|
//the thread masks. The intend seems to be to make serial code run fast, if you
|
||||||
|
//compile with OpenMP enabled but don't actually use parallel regions or so
|
||||||
|
//static int omp_max_threads = omp_get_max_threads();
|
||||||
|
int nthreads = 0;
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
#pragma omp atomic
|
||||||
|
nthreads++;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int omp_max_threads = nthreads;
|
||||||
|
|
||||||
|
const bool is_initialized = 0 != Impl::OpenMPexec::m_pool[0] ;
|
||||||
|
|
||||||
|
bool thread_spawn_failed = false ;
|
||||||
|
|
||||||
|
if ( ! is_initialized ) {
|
||||||
|
|
||||||
|
// Use hwloc thread pinning if concerned with locality.
|
||||||
|
// If spreading threads across multiple NUMA regions.
|
||||||
|
// If hyperthreading is enabled.
|
||||||
|
Impl::s_using_hwloc = hwloc::available() && (
|
||||||
|
( 1 < Kokkos::hwloc::get_available_numa_count() ) ||
|
||||||
|
( 1 < Kokkos::hwloc::get_available_threads_per_core() ) );
|
||||||
|
|
||||||
|
std::pair<unsigned,unsigned> threads_coord[ Impl::OpenMPexec::MAX_THREAD_COUNT ];
|
||||||
|
|
||||||
|
// If hwloc available then use it's maximum value.
|
||||||
|
|
||||||
|
if ( thread_count == 0 ) {
|
||||||
|
thread_count = Impl::s_using_hwloc
|
||||||
|
? Kokkos::hwloc::get_available_numa_count() *
|
||||||
|
Kokkos::hwloc::get_available_cores_per_numa() *
|
||||||
|
Kokkos::hwloc::get_available_threads_per_core()
|
||||||
|
: omp_max_threads ;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(Impl::s_using_hwloc)
|
||||||
|
hwloc::thread_mapping( "Kokkos::OpenMP::initialize" ,
|
||||||
|
false /* do not allow asynchronous */ ,
|
||||||
|
thread_count ,
|
||||||
|
use_numa_count ,
|
||||||
|
use_cores_per_numa ,
|
||||||
|
threads_coord );
|
||||||
|
|
||||||
|
// Spawn threads:
|
||||||
|
|
||||||
|
omp_set_num_threads( thread_count );
|
||||||
|
|
||||||
|
// Verify OMP interaction:
|
||||||
|
if ( int(thread_count) != omp_get_max_threads() ) {
|
||||||
|
thread_spawn_failed = true ;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify spawning and bind threads:
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
#pragma omp critical
|
||||||
|
{
|
||||||
|
if ( int(thread_count) != omp_get_num_threads() ) {
|
||||||
|
thread_spawn_failed = true ;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call to 'bind_this_thread' is not thread safe so place this whole block in a critical region.
|
||||||
|
// Call to 'new' may not be thread safe as well.
|
||||||
|
|
||||||
|
// Reverse the rank for threads so that the scan operation reduces to the highest rank thread.
|
||||||
|
|
||||||
|
const unsigned omp_rank = omp_get_thread_num();
|
||||||
|
const unsigned thread_r = Impl::s_using_hwloc ? Kokkos::hwloc::bind_this_thread( thread_count , threads_coord ) : omp_rank ;
|
||||||
|
|
||||||
|
Impl::OpenMPexec::m_map_rank[ omp_rank ] = thread_r ;
|
||||||
|
}
|
||||||
|
/* END #pragma omp critical */
|
||||||
|
}
|
||||||
|
/* END #pragma omp parallel */
|
||||||
|
|
||||||
|
if ( ! thread_spawn_failed ) {
|
||||||
|
Impl::OpenMPexec::m_pool_topo[0] = thread_count ;
|
||||||
|
Impl::OpenMPexec::m_pool_topo[1] = Impl::s_using_hwloc ? thread_count / use_numa_count : thread_count;
|
||||||
|
Impl::OpenMPexec::m_pool_topo[2] = Impl::s_using_hwloc ? thread_count / ( use_numa_count * use_cores_per_numa ) : 1;
|
||||||
|
|
||||||
|
Impl::OpenMPexec::resize_scratch( 1024 , 1024 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( is_initialized || thread_spawn_failed ) {
|
||||||
|
std::string msg("Kokkos::OpenMP::initialize ERROR");
|
||||||
|
|
||||||
|
if ( is_initialized ) { msg.append(" : already initialized"); }
|
||||||
|
if ( thread_spawn_failed ) { msg.append(" : failed spawning threads"); }
|
||||||
|
|
||||||
|
Kokkos::Impl::throw_runtime_exception(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init the array for used for arbitrarily sized atomics
|
||||||
|
Impl::init_lock_array_host_space();
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
void OpenMP::finalize()
|
||||||
|
{
|
||||||
|
Impl::OpenMPexec::verify_initialized( "OpenMP::finalize" );
|
||||||
|
Impl::OpenMPexec::verify_is_process( "OpenMP::finalize" );
|
||||||
|
|
||||||
|
Impl::OpenMPexec::clear_scratch();
|
||||||
|
|
||||||
|
Impl::OpenMPexec::m_pool_topo[0] = 0 ;
|
||||||
|
Impl::OpenMPexec::m_pool_topo[1] = 0 ;
|
||||||
|
Impl::OpenMPexec::m_pool_topo[2] = 0 ;
|
||||||
|
|
||||||
|
omp_set_num_threads(1);
|
||||||
|
|
||||||
|
if ( Impl::s_using_hwloc ) {
|
||||||
|
hwloc::unbind_this_thread();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
void OpenMP::print_configuration( std::ostream & s , const bool detail )
|
||||||
|
{
|
||||||
|
Impl::OpenMPexec::verify_is_process( "OpenMP::print_configuration" );
|
||||||
|
|
||||||
|
s << "Kokkos::OpenMP" ;
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_OPENMP )
|
||||||
|
s << " KOKKOS_HAVE_OPENMP" ;
|
||||||
|
#endif
|
||||||
|
#if defined( KOKKOS_HAVE_HWLOC )
|
||||||
|
|
||||||
|
const unsigned numa_count_ = Kokkos::hwloc::get_available_numa_count();
|
||||||
|
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
|
||||||
|
const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
|
||||||
|
|
||||||
|
s << " hwloc[" << numa_count_ << "x" << cores_per_numa << "x" << threads_per_core << "]"
|
||||||
|
<< " hwloc_binding_" << ( Impl::s_using_hwloc ? "enabled" : "disabled" )
|
||||||
|
;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const bool is_initialized = 0 != Impl::OpenMPexec::m_pool[0] ;
|
||||||
|
|
||||||
|
if ( is_initialized ) {
|
||||||
|
const int numa_count = Kokkos::Impl::OpenMPexec::m_pool_topo[0] / Kokkos::Impl::OpenMPexec::m_pool_topo[1] ;
|
||||||
|
const int core_per_numa = Kokkos::Impl::OpenMPexec::m_pool_topo[1] / Kokkos::Impl::OpenMPexec::m_pool_topo[2] ;
|
||||||
|
const int thread_per_core = Kokkos::Impl::OpenMPexec::m_pool_topo[2] ;
|
||||||
|
|
||||||
|
s << " thread_pool_topology[ " << numa_count
|
||||||
|
<< " x " << core_per_numa
|
||||||
|
<< " x " << thread_per_core
|
||||||
|
<< " ]"
|
||||||
|
<< std::endl ;
|
||||||
|
|
||||||
|
if ( detail ) {
|
||||||
|
std::vector< std::pair<unsigned,unsigned> > coord( Kokkos::Impl::OpenMPexec::m_pool_topo[0] );
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
#pragma omp critical
|
||||||
|
{
|
||||||
|
coord[ omp_get_thread_num() ] = hwloc::get_this_thread_coordinate();
|
||||||
|
}
|
||||||
|
/* END #pragma omp critical */
|
||||||
|
}
|
||||||
|
/* END #pragma omp parallel */
|
||||||
|
|
||||||
|
for ( unsigned i = 0 ; i < coord.size() ; ++i ) {
|
||||||
|
s << " thread omp_rank[" << i << "]"
|
||||||
|
<< " kokkos_rank[" << Impl::OpenMPexec::m_map_rank[ i ] << "]"
|
||||||
|
<< " hwloc_coord[" << coord[i].first << "." << coord[i].second << "]"
|
||||||
|
<< std::endl ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
s << " not initialized" << std::endl ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#endif //KOKKOS_HAVE_OPENMP
|
||||||
767
lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp
Executable file
767
lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp
Executable file
@ -0,0 +1,767 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_OPENMPEXEC_HPP
|
||||||
|
#define KOKKOS_OPENMPEXEC_HPP
|
||||||
|
|
||||||
|
#include <impl/Kokkos_Traits.hpp>
|
||||||
|
#include <impl/Kokkos_spinwait.hpp>
|
||||||
|
#include <impl/Kokkos_AllocationTracker.hpp>
|
||||||
|
|
||||||
|
#include <Kokkos_Atomic.hpp>
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
/** \brief Data for OpenMP thread execution */
|
||||||
|
|
||||||
|
class OpenMPexec {
|
||||||
|
public:
|
||||||
|
|
||||||
|
enum { MAX_THREAD_COUNT = 4096 };
|
||||||
|
|
||||||
|
struct Pool
|
||||||
|
{
|
||||||
|
Pool() : m_trackers() {}
|
||||||
|
|
||||||
|
AllocationTracker m_trackers[ MAX_THREAD_COUNT ];
|
||||||
|
|
||||||
|
OpenMPexec * operator[](int i)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<OpenMPexec *>(m_trackers[i].alloc_ptr());
|
||||||
|
}
|
||||||
|
|
||||||
|
AllocationTracker & at(int i)
|
||||||
|
{
|
||||||
|
return m_trackers[i];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
static int m_pool_topo[ 4 ];
|
||||||
|
static int m_map_rank[ MAX_THREAD_COUNT ];
|
||||||
|
static Pool m_pool; // Indexed by: m_pool_rank_rev
|
||||||
|
|
||||||
|
friend class Kokkos::OpenMP ;
|
||||||
|
|
||||||
|
int const m_pool_rank ;
|
||||||
|
int const m_pool_rank_rev ;
|
||||||
|
int const m_scratch_exec_end ;
|
||||||
|
int const m_scratch_reduce_end ;
|
||||||
|
int const m_scratch_thread_end ;
|
||||||
|
|
||||||
|
int volatile m_barrier_state ;
|
||||||
|
|
||||||
|
OpenMPexec();
|
||||||
|
OpenMPexec( const OpenMPexec & );
|
||||||
|
OpenMPexec & operator = ( const OpenMPexec & );
|
||||||
|
|
||||||
|
static void clear_scratch();
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
// Topology of a cache coherent thread pool:
|
||||||
|
// TOTAL = NUMA x GRAIN
|
||||||
|
// pool_size( depth = 0 )
|
||||||
|
// pool_size(0) = total number of threads
|
||||||
|
// pool_size(1) = number of threads per NUMA
|
||||||
|
// pool_size(2) = number of threads sharing finest grain memory hierarchy
|
||||||
|
|
||||||
|
inline static
|
||||||
|
int pool_size( int depth = 0 ) { return m_pool_topo[ depth ]; }
|
||||||
|
|
||||||
|
inline static
|
||||||
|
OpenMPexec * pool_rev( int pool_rank_rev ) { return m_pool[ pool_rank_rev ]; }
|
||||||
|
|
||||||
|
inline int pool_rank() const { return m_pool_rank ; }
|
||||||
|
inline int pool_rank_rev() const { return m_pool_rank_rev ; }
|
||||||
|
|
||||||
|
inline void * scratch_reduce() const { return ((char *) this) + m_scratch_exec_end ; }
|
||||||
|
inline void * scratch_thread() const { return ((char *) this) + m_scratch_reduce_end ; }
|
||||||
|
|
||||||
|
inline
|
||||||
|
void state_wait( int state )
|
||||||
|
{ Impl::spinwait( m_barrier_state , state ); }
|
||||||
|
|
||||||
|
inline
|
||||||
|
void state_set( int state ) { m_barrier_state = state ; }
|
||||||
|
|
||||||
|
~OpenMPexec() {}
|
||||||
|
|
||||||
|
OpenMPexec( const int poolRank
|
||||||
|
, const int scratch_exec_size
|
||||||
|
, const int scratch_reduce_size
|
||||||
|
, const int scratch_thread_size )
|
||||||
|
: m_pool_rank( poolRank )
|
||||||
|
, m_pool_rank_rev( pool_size() - ( poolRank + 1 ) )
|
||||||
|
, m_scratch_exec_end( scratch_exec_size )
|
||||||
|
, m_scratch_reduce_end( m_scratch_exec_end + scratch_reduce_size )
|
||||||
|
, m_scratch_thread_end( m_scratch_reduce_end + scratch_thread_size )
|
||||||
|
, m_barrier_state(0)
|
||||||
|
{}
|
||||||
|
|
||||||
|
static void finalize();
|
||||||
|
|
||||||
|
static void initialize( const unsigned team_count ,
|
||||||
|
const unsigned threads_per_team ,
|
||||||
|
const unsigned numa_count ,
|
||||||
|
const unsigned cores_per_numa );
|
||||||
|
|
||||||
|
static void verify_is_process( const char * const );
|
||||||
|
static void verify_initialized( const char * const );
|
||||||
|
|
||||||
|
static void resize_scratch( size_t reduce_size , size_t thread_size );
|
||||||
|
|
||||||
|
inline static
|
||||||
|
OpenMPexec * get_thread_omp() { return m_pool[ m_map_rank[ omp_get_thread_num() ] ]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
class OpenMPexecTeamMember {
|
||||||
|
private:
|
||||||
|
|
||||||
|
enum { TEAM_REDUCE_SIZE = 512 };
|
||||||
|
|
||||||
|
/** \brief Thread states for team synchronization */
|
||||||
|
enum { Active = 0 , Rendezvous = 1 };
|
||||||
|
|
||||||
|
typedef Kokkos::OpenMP execution_space ;
|
||||||
|
typedef execution_space::scratch_memory_space scratch_memory_space ;
|
||||||
|
|
||||||
|
Impl::OpenMPexec & m_exec ;
|
||||||
|
scratch_memory_space m_team_shared ;
|
||||||
|
int m_team_shmem ;
|
||||||
|
int m_team_base_rev ;
|
||||||
|
int m_team_rank_rev ;
|
||||||
|
int m_team_rank ;
|
||||||
|
int m_team_size ;
|
||||||
|
int m_league_rank ;
|
||||||
|
int m_league_end ;
|
||||||
|
int m_league_size ;
|
||||||
|
|
||||||
|
// Fan-in team threads, root of the fan-in which does not block returns true
|
||||||
|
inline
|
||||||
|
bool team_fan_in() const
|
||||||
|
{
|
||||||
|
for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) {
|
||||||
|
m_exec.pool_rev( m_team_base_rev + j )->state_wait( Active );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( m_team_rank_rev ) {
|
||||||
|
m_exec.state_set( Rendezvous );
|
||||||
|
m_exec.state_wait( Rendezvous );
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0 == m_team_rank_rev ;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void team_fan_out() const
|
||||||
|
{
|
||||||
|
for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) {
|
||||||
|
m_exec.pool_rev( m_team_base_rev + j )->state_set( Active );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
const execution_space::scratch_memory_space & team_shmem() const
|
||||||
|
{ return m_team_shared ; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; }
|
||||||
|
KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
|
||||||
|
KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; }
|
||||||
|
KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION void team_barrier() const
|
||||||
|
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
{}
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
if ( 1 < m_team_size ) {
|
||||||
|
team_fan_in();
|
||||||
|
team_fan_out();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<class ValueType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void team_broadcast(ValueType& value, const int& thread_id) const
|
||||||
|
{
|
||||||
|
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
{ }
|
||||||
|
#else
|
||||||
|
// Make sure there is enough scratch space:
|
||||||
|
typedef typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE
|
||||||
|
, ValueType , void >::type type ;
|
||||||
|
|
||||||
|
type * const local_value = ((type*) m_exec.scratch_thread());
|
||||||
|
if(team_rank() == thread_id)
|
||||||
|
*local_value = value;
|
||||||
|
memory_fence();
|
||||||
|
team_barrier();
|
||||||
|
value = *local_value;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_CXX11
|
||||||
|
template< class ValueType, class JoinOp >
|
||||||
|
KOKKOS_INLINE_FUNCTION ValueType
|
||||||
|
team_reduce( const ValueType & value
|
||||||
|
, const JoinOp & op_in ) const
|
||||||
|
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
{ return ValueType(); }
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
typedef ValueType value_type;
|
||||||
|
const JoinLambdaAdapter<value_type,JoinOp> op(op_in);
|
||||||
|
#endif
|
||||||
|
#else // KOKKOS_HAVE_CXX11
|
||||||
|
template< class JoinOp >
|
||||||
|
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
|
||||||
|
team_reduce( const typename JoinOp::value_type & value
|
||||||
|
, const JoinOp & op ) const
|
||||||
|
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
{ return typename JoinOp::value_type(); }
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
typedef typename JoinOp::value_type value_type;
|
||||||
|
#endif
|
||||||
|
#endif // KOKKOS_HAVE_CXX11
|
||||||
|
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
// Make sure there is enough scratch space:
|
||||||
|
typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE
|
||||||
|
, value_type , void >::type type ;
|
||||||
|
|
||||||
|
type * const local_value = ((type*) m_exec.scratch_thread());
|
||||||
|
|
||||||
|
// Set this thread's contribution
|
||||||
|
*local_value = value ;
|
||||||
|
|
||||||
|
// Fence to make sure the base team member has access:
|
||||||
|
memory_fence();
|
||||||
|
|
||||||
|
if ( team_fan_in() ) {
|
||||||
|
// The last thread to synchronize returns true, all other threads wait for team_fan_out()
|
||||||
|
type * const team_value = ((type*) m_exec.pool_rev( m_team_base_rev )->scratch_thread());
|
||||||
|
|
||||||
|
// Join to the team value:
|
||||||
|
for ( int i = 1 ; i < m_team_size ; ++i ) {
|
||||||
|
op.join( *team_value , *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()) );
|
||||||
|
}
|
||||||
|
|
||||||
|
// The base team member may "lap" the other team members,
|
||||||
|
// copy to their local value before proceeding.
|
||||||
|
for ( int i = 1 ; i < m_team_size ; ++i ) {
|
||||||
|
*((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()) = *team_value ;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fence to make sure all team members have access
|
||||||
|
memory_fence();
|
||||||
|
}
|
||||||
|
|
||||||
|
team_fan_out();
|
||||||
|
|
||||||
|
return *((type volatile const *)local_value);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
|
||||||
|
* with intra-team non-deterministic ordering accumulation.
|
||||||
|
*
|
||||||
|
* The global inter-team accumulation value will, at the end of the
|
||||||
|
* league's parallel execution, be the scan's total.
|
||||||
|
* Parallel execution ordering of the league's teams is non-deterministic.
|
||||||
|
* As such the base value for each team's scan operation is similarly
|
||||||
|
* non-deterministic.
|
||||||
|
*/
|
||||||
|
template< typename ArgType >
|
||||||
|
KOKKOS_INLINE_FUNCTION ArgType team_scan( const ArgType & value , ArgType * const global_accum ) const
|
||||||
|
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
{ return ArgType(); }
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
// Make sure there is enough scratch space:
|
||||||
|
typedef typename if_c< sizeof(ArgType) < TEAM_REDUCE_SIZE , ArgType , void >::type type ;
|
||||||
|
|
||||||
|
volatile type * const work_value = ((type*) m_exec.scratch_thread());
|
||||||
|
|
||||||
|
*work_value = value ;
|
||||||
|
|
||||||
|
memory_fence();
|
||||||
|
|
||||||
|
if ( team_fan_in() ) {
|
||||||
|
// The last thread to synchronize returns true, all other threads wait for team_fan_out()
|
||||||
|
// m_team_base[0] == highest ranking team member
|
||||||
|
// m_team_base[ m_team_size - 1 ] == lowest ranking team member
|
||||||
|
//
|
||||||
|
// 1) copy from lower to higher rank, initialize lowest rank to zero
|
||||||
|
// 2) prefix sum from lowest to highest rank, skipping lowest rank
|
||||||
|
|
||||||
|
type accum = 0 ;
|
||||||
|
|
||||||
|
if ( global_accum ) {
|
||||||
|
for ( int i = m_team_size ; i-- ; ) {
|
||||||
|
type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread());
|
||||||
|
accum += val ;
|
||||||
|
}
|
||||||
|
accum = atomic_fetch_add( global_accum , accum );
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( int i = m_team_size ; i-- ; ) {
|
||||||
|
type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread());
|
||||||
|
const type offset = accum ;
|
||||||
|
accum += val ;
|
||||||
|
val = offset ;
|
||||||
|
}
|
||||||
|
|
||||||
|
memory_fence();
|
||||||
|
}
|
||||||
|
|
||||||
|
team_fan_out();
|
||||||
|
|
||||||
|
return *work_value ;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
|
||||||
|
*
|
||||||
|
* The highest rank thread can compute the reduction total as
|
||||||
|
* reduction_total = dev.team_scan( value ) + value ;
|
||||||
|
*/
|
||||||
|
template< typename Type >
|
||||||
|
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const
|
||||||
|
{ return this-> template team_scan<Type>( value , 0 ); }
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
// Private for the driver
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef execution_space::scratch_memory_space space ;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
template< class Arg0 , class Arg1 >
|
||||||
|
inline
|
||||||
|
OpenMPexecTeamMember( Impl::OpenMPexec & exec
|
||||||
|
, const TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP > & team
|
||||||
|
, const int shmem_size
|
||||||
|
)
|
||||||
|
: m_exec( exec )
|
||||||
|
, m_team_shared(0,0)
|
||||||
|
, m_team_shmem( shmem_size )
|
||||||
|
, m_team_base_rev(0)
|
||||||
|
, m_team_rank_rev(0)
|
||||||
|
, m_team_rank(0)
|
||||||
|
, m_team_size( team.team_size() )
|
||||||
|
, m_league_rank(0)
|
||||||
|
, m_league_end(0)
|
||||||
|
, m_league_size( team.league_size() )
|
||||||
|
{
|
||||||
|
const int pool_rank_rev = m_exec.pool_rank_rev();
|
||||||
|
const int pool_team_rank_rev = pool_rank_rev % team.team_alloc();
|
||||||
|
const int pool_league_rank_rev = pool_rank_rev / team.team_alloc();
|
||||||
|
const int league_iter_end = team.league_size() - pool_league_rank_rev * team.team_iter();
|
||||||
|
|
||||||
|
if ( pool_team_rank_rev < m_team_size && 0 < league_iter_end ) {
|
||||||
|
m_team_base_rev = team.team_alloc() * pool_league_rank_rev ;
|
||||||
|
m_team_rank_rev = pool_team_rank_rev ;
|
||||||
|
m_team_rank = m_team_size - ( m_team_rank_rev + 1 );
|
||||||
|
m_league_end = league_iter_end ;
|
||||||
|
m_league_rank = league_iter_end > team.team_iter() ? league_iter_end - team.team_iter() : 0 ;
|
||||||
|
new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_shmem );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool valid() const
|
||||||
|
{ return m_league_rank < m_league_end ; }
|
||||||
|
|
||||||
|
void next()
|
||||||
|
{
|
||||||
|
if ( ++m_league_rank < m_league_end ) {
|
||||||
|
team_barrier();
|
||||||
|
new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_shmem );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int team_reduce_size() { return TEAM_REDUCE_SIZE ; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
|
||||||
|
template< class Arg0 , class Arg1 >
|
||||||
|
class TeamPolicy< Arg0 , Arg1 , Kokkos::OpenMP >
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
//! Tag this class as a kokkos execution policy
|
||||||
|
typedef TeamPolicy execution_policy ;
|
||||||
|
|
||||||
|
//! Execution space of this execution policy.
|
||||||
|
typedef Kokkos::OpenMP execution_space ;
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Impl::if_c< ! Impl::is_same< Kokkos::OpenMP , Arg0 >::value , Arg0 , Arg1 >::type
|
||||||
|
work_tag ;
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
inline static
|
||||||
|
int team_size_max( const FunctorType & )
|
||||||
|
{ return execution_space::thread_pool_size(1); }
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
inline static
|
||||||
|
int team_size_recommended( const FunctorType & )
|
||||||
|
{ return execution_space::thread_pool_size(2); }
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
inline static
|
||||||
|
int team_size_recommended( const FunctorType &, const int& )
|
||||||
|
{ return execution_space::thread_pool_size(2); }
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
int m_league_size ;
|
||||||
|
int m_team_size ;
|
||||||
|
int m_team_alloc ;
|
||||||
|
int m_team_iter ;
|
||||||
|
|
||||||
|
inline void init( const int league_size_request
|
||||||
|
, const int team_size_request )
|
||||||
|
{
|
||||||
|
const int pool_size = execution_space::thread_pool_size(0);
|
||||||
|
const int team_max = execution_space::thread_pool_size(1);
|
||||||
|
const int team_grain = execution_space::thread_pool_size(2);
|
||||||
|
|
||||||
|
m_league_size = league_size_request ;
|
||||||
|
|
||||||
|
m_team_size = team_size_request < team_max ?
|
||||||
|
team_size_request : team_max ;
|
||||||
|
|
||||||
|
// Round team size up to a multiple of 'team_gain'
|
||||||
|
const int team_size_grain = team_grain * ( ( m_team_size + team_grain - 1 ) / team_grain );
|
||||||
|
const int team_count = pool_size / team_size_grain ;
|
||||||
|
|
||||||
|
// Constraint : pool_size = m_team_alloc * team_count
|
||||||
|
m_team_alloc = pool_size / team_count ;
|
||||||
|
|
||||||
|
// Maxumum number of iterations each team will take:
|
||||||
|
m_team_iter = ( m_league_size + team_count - 1 ) / team_count ;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
inline int team_size() const { return m_team_size ; }
|
||||||
|
inline int league_size() const { return m_league_size ; }
|
||||||
|
|
||||||
|
/** \brief Specify league size, request team size */
|
||||||
|
TeamPolicy( execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1)
|
||||||
|
{ init( league_size_request , team_size_request ); (void) vector_length_request; }
|
||||||
|
|
||||||
|
TeamPolicy( int league_size_request , int team_size_request , int vector_length_request = 1 )
|
||||||
|
{ init( league_size_request , team_size_request ); (void) vector_length_request; }
|
||||||
|
|
||||||
|
inline int team_alloc() const { return m_team_alloc ; }
|
||||||
|
inline int team_iter() const { return m_team_iter ; }
|
||||||
|
|
||||||
|
typedef Impl::OpenMPexecTeamMember member_type ;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
inline
|
||||||
|
int OpenMP::thread_pool_size( int depth )
|
||||||
|
{
|
||||||
|
return Impl::OpenMPexec::pool_size(depth);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
int OpenMP::thread_pool_rank()
|
||||||
|
{
|
||||||
|
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
return Impl::OpenMPexec::m_map_rank[ omp_get_thread_num() ];
|
||||||
|
#else
|
||||||
|
return -1 ;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
template<typename iType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>
|
||||||
|
TeamThreadRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) {
|
||||||
|
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>(thread,count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename iType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>
|
||||||
|
TeamThreadRange(const Impl::OpenMPexecTeamMember& thread, const iType& begin, const iType& end) {
|
||||||
|
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>(thread,begin,end);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename iType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >
|
||||||
|
ThreadVectorRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) {
|
||||||
|
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >(thread,count);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember> PerTeam(const Impl::OpenMPexecTeamMember& thread) {
|
||||||
|
return Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>(thread);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember> PerThread(const Impl::OpenMPexecTeamMember& thread) {
|
||||||
|
return Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>(thread);
|
||||||
|
}
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
|
||||||
|
* This functionality requires C++11 support.*/
|
||||||
|
template<typename iType, class Lambda>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries, const Lambda& lambda) {
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||||
|
lambda(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of
|
||||||
|
* val is performed and put into result. This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class Lambda, typename ValueType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries,
|
||||||
|
const Lambda & lambda, ValueType& result) {
|
||||||
|
|
||||||
|
result = ValueType();
|
||||||
|
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
ValueType tmp = ValueType();
|
||||||
|
lambda(i,tmp);
|
||||||
|
result+=tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
|
||||||
|
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
|
||||||
|
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
|
||||||
|
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
|
||||||
|
* '1 for *'). This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class Lambda, typename ValueType, class JoinType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries,
|
||||||
|
const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||||
|
|
||||||
|
ValueType result = init_result;
|
||||||
|
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
ValueType tmp = ValueType();
|
||||||
|
lambda(i,tmp);
|
||||||
|
join(result,tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
init_result = loop_boundaries.thread.team_reduce(result,join);
|
||||||
|
}
|
||||||
|
|
||||||
|
} //namespace Kokkos
|
||||||
|
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
|
||||||
|
* This functionality requires C++11 support.*/
|
||||||
|
template<typename iType, class Lambda>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
|
||||||
|
loop_boundaries, const Lambda& lambda) {
|
||||||
|
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||||
|
#pragma ivdep
|
||||||
|
#endif
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||||
|
lambda(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of
|
||||||
|
* val is performed and put into result. This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class Lambda, typename ValueType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
|
||||||
|
loop_boundaries, const Lambda & lambda, ValueType& result) {
|
||||||
|
result = ValueType();
|
||||||
|
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||||
|
#pragma ivdep
|
||||||
|
#endif
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
ValueType tmp = ValueType();
|
||||||
|
lambda(i,tmp);
|
||||||
|
result+=tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
|
||||||
|
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
|
||||||
|
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
|
||||||
|
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
|
||||||
|
* '1 for *'). This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class Lambda, typename ValueType, class JoinType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
|
||||||
|
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||||
|
|
||||||
|
ValueType result = init_result;
|
||||||
|
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||||
|
#pragma ivdep
|
||||||
|
#endif
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
ValueType tmp = ValueType();
|
||||||
|
lambda(i,tmp);
|
||||||
|
join(result,tmp);
|
||||||
|
}
|
||||||
|
init_result = result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final)
|
||||||
|
* for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed.
|
||||||
|
* Depending on the target execution space the operator might be called twice: once with final=false
|
||||||
|
* and once with final=true. When final==true val contains the prefix sum value. The contribution of this
|
||||||
|
* "i" needs to be added to val no matter whether final==true or not. In a serial execution
|
||||||
|
* (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set
|
||||||
|
* to the final sum value over all vector lanes.
|
||||||
|
* This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class FunctorType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >&
|
||||||
|
loop_boundaries, const FunctorType & lambda) {
|
||||||
|
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
|
||||||
|
typedef typename ValueTraits::value_type value_type ;
|
||||||
|
|
||||||
|
value_type scan_val = value_type();
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||||
|
#pragma ivdep
|
||||||
|
#endif
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
lambda(i,scan_val,true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
template<class FunctorType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void single(const Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda) {
|
||||||
|
lambda();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class FunctorType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void single(const Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda) {
|
||||||
|
if(single_struct.team_member.team_rank()==0) lambda();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class FunctorType, class ValueType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void single(const Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) {
|
||||||
|
lambda(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class FunctorType, class ValueType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void single(const Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) {
|
||||||
|
if(single_struct.team_member.team_rank()==0) {
|
||||||
|
lambda(val);
|
||||||
|
}
|
||||||
|
single_struct.team_member.team_broadcast(val,0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* #ifndef KOKKOS_OPENMPEXEC_HPP */
|
||||||
|
|
||||||
484
lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.cpp
Executable file
484
lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.cpp
Executable file
@ -0,0 +1,484 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <Kokkos_Core_fwd.hpp>
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_QTHREAD )
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <utility>
|
||||||
|
#include <Kokkos_Qthread.hpp>
|
||||||
|
#include <Kokkos_Atomic.hpp>
|
||||||
|
#include <impl/Kokkos_Error.hpp>
|
||||||
|
|
||||||
|
// Defines to enable experimental Qthread functionality
|
||||||
|
|
||||||
|
#define QTHREAD_LOCAL_PRIORITY
|
||||||
|
#define CLONED_TASKS
|
||||||
|
|
||||||
|
#include <qthread/qthread.h>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
enum { MAXIMUM_QTHREAD_WORKERS = 1024 };
|
||||||
|
|
||||||
|
/** s_exec is indexed by the reverse rank of the workers
|
||||||
|
* for faster fan-in / fan-out lookups
|
||||||
|
* [ n - 1 , n - 2 , ... , 0 ]
|
||||||
|
*/
|
||||||
|
QthreadExec * s_exec[ MAXIMUM_QTHREAD_WORKERS ];
|
||||||
|
|
||||||
|
int s_number_shepherds = 0 ;
|
||||||
|
int s_number_workers_per_shepherd = 0 ;
|
||||||
|
int s_number_workers = 0 ;
|
||||||
|
|
||||||
|
inline
|
||||||
|
QthreadExec ** worker_exec()
|
||||||
|
{
|
||||||
|
return s_exec + s_number_workers - ( qthread_shep() * s_number_workers_per_shepherd + qthread_worker_local(NULL) + 1 );
|
||||||
|
}
|
||||||
|
|
||||||
|
const int s_base_size = QthreadExec::align_alloc( sizeof(QthreadExec) );
|
||||||
|
|
||||||
|
int s_worker_reduce_end = 0 ; /* End of worker reduction memory */
|
||||||
|
int s_worker_shared_end = 0 ; /* Total of worker scratch memory */
|
||||||
|
int s_worker_shared_begin = 0 ; /* Beginning of worker shared memory */
|
||||||
|
|
||||||
|
QthreadExecFunctionPointer volatile s_active_function = 0 ;
|
||||||
|
const void * volatile s_active_function_arg = 0 ;
|
||||||
|
|
||||||
|
} /* namespace */
|
||||||
|
} /* namespace Impl */
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
void Qthread::initialize( int thread_count )
|
||||||
|
{
|
||||||
|
// Environment variable: QTHREAD_NUM_SHEPHERDS
|
||||||
|
// Environment variable: QTHREAD_NUM_WORKERS_PER_SHEP
|
||||||
|
// Environment variable: QTHREAD_HWPAR
|
||||||
|
|
||||||
|
{
|
||||||
|
char buffer[256];
|
||||||
|
snprintf(buffer,sizeof(buffer),"QTHREAD_HWPAR=%d",thread_count);
|
||||||
|
putenv(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
const bool ok_init = ( QTHREAD_SUCCESS == qthread_initialize() ) &&
|
||||||
|
( thread_count == qthread_num_shepherds() * qthread_num_workers_local(NO_SHEPHERD) ) &&
|
||||||
|
( thread_count == qthread_num_workers() );
|
||||||
|
|
||||||
|
bool ok_symmetry = true ;
|
||||||
|
|
||||||
|
if ( ok_init ) {
|
||||||
|
Impl::s_number_shepherds = qthread_num_shepherds();
|
||||||
|
Impl::s_number_workers_per_shepherd = qthread_num_workers_local(NO_SHEPHERD);
|
||||||
|
Impl::s_number_workers = Impl::s_number_shepherds * Impl::s_number_workers_per_shepherd ;
|
||||||
|
|
||||||
|
for ( int i = 0 ; ok_symmetry && i < Impl::s_number_shepherds ; ++i ) {
|
||||||
|
ok_symmetry = ( Impl::s_number_workers_per_shepherd == qthread_num_workers_local(i) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! ok_init || ! ok_symmetry ) {
|
||||||
|
std::ostringstream msg ;
|
||||||
|
|
||||||
|
msg << "Kokkos::Qthread::initialize(" << thread_count << ") FAILED" ;
|
||||||
|
msg << " : qthread_num_shepherds = " << qthread_num_shepherds();
|
||||||
|
msg << " : qthread_num_workers_per_shepherd = " << qthread_num_workers_local(NO_SHEPHERD);
|
||||||
|
msg << " : qthread_num_workers = " << qthread_num_workers();
|
||||||
|
|
||||||
|
if ( ! ok_symmetry ) {
|
||||||
|
msg << " : qthread_num_workers_local = {" ;
|
||||||
|
for ( int i = 0 ; i < Impl::s_number_shepherds ; ++i ) {
|
||||||
|
msg << " " << qthread_num_workers_local(i) ;
|
||||||
|
}
|
||||||
|
msg << " }" ;
|
||||||
|
}
|
||||||
|
|
||||||
|
Impl::s_number_workers = 0 ;
|
||||||
|
Impl::s_number_shepherds = 0 ;
|
||||||
|
Impl::s_number_workers_per_shepherd = 0 ;
|
||||||
|
|
||||||
|
if ( ok_init ) { qthread_finalize(); }
|
||||||
|
|
||||||
|
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||||
|
}
|
||||||
|
|
||||||
|
Impl::QthreadExec::resize_worker_scratch( 256 , 256 );
|
||||||
|
|
||||||
|
// Init the array for used for arbitrarily sized atomics
|
||||||
|
Impl::init_lock_array_host_space();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void Qthread::finalize()
|
||||||
|
{
|
||||||
|
Impl::QthreadExec::clear_workers();
|
||||||
|
|
||||||
|
if ( Impl::s_number_workers ) {
|
||||||
|
qthread_finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
Impl::s_number_workers = 0 ;
|
||||||
|
Impl::s_number_shepherds = 0 ;
|
||||||
|
Impl::s_number_workers_per_shepherd = 0 ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Qthread::print_configuration( std::ostream & s , const bool detail )
|
||||||
|
{
|
||||||
|
s << "Kokkos::Qthread {"
|
||||||
|
<< " num_shepherds(" << Impl::s_number_shepherds << ")"
|
||||||
|
<< " num_workers_per_shepherd(" << Impl::s_number_workers_per_shepherd << ")"
|
||||||
|
<< " }" << std::endl ;
|
||||||
|
}
|
||||||
|
|
||||||
|
Qthread & Qthread::instance( int )
|
||||||
|
{
|
||||||
|
static Qthread q ;
|
||||||
|
return q ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Qthread::fence()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
int Qthread::shepherd_size() const { return Impl::s_number_shepherds ; }
|
||||||
|
int Qthread::shepherd_worker_size() const { return Impl::s_number_workers_per_shepherd ; }
|
||||||
|
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
aligned_t driver_exec_all( void * arg )
|
||||||
|
{
|
||||||
|
QthreadExec & exec = **worker_exec();
|
||||||
|
|
||||||
|
(*s_active_function)( exec , s_active_function_arg );
|
||||||
|
|
||||||
|
/*
|
||||||
|
fprintf( stdout
|
||||||
|
, "QthreadExec driver worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n"
|
||||||
|
, exec.worker_rank()
|
||||||
|
, exec.worker_size()
|
||||||
|
, exec.shepherd_rank()
|
||||||
|
, exec.shepherd_size()
|
||||||
|
, exec.shepherd_worker_rank()
|
||||||
|
, exec.shepherd_worker_size()
|
||||||
|
);
|
||||||
|
fflush(stdout);
|
||||||
|
*/
|
||||||
|
|
||||||
|
return 0 ;
|
||||||
|
}
|
||||||
|
|
||||||
|
aligned_t driver_resize_worker_scratch( void * arg )
|
||||||
|
{
|
||||||
|
static volatile int lock_begin = 0 ;
|
||||||
|
static volatile int lock_end = 0 ;
|
||||||
|
|
||||||
|
QthreadExec ** const exec = worker_exec();
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
// Serialize allocation for thread safety
|
||||||
|
|
||||||
|
while ( ! atomic_compare_exchange_strong( & lock_begin , 0 , 1 ) ); // Spin wait to claim lock
|
||||||
|
|
||||||
|
const bool ok = 0 == *exec ;
|
||||||
|
|
||||||
|
if ( ok ) { *exec = (QthreadExec *) malloc( s_base_size + s_worker_shared_end ); }
|
||||||
|
|
||||||
|
lock_begin = 0 ; // release lock
|
||||||
|
|
||||||
|
if ( ok ) { new( *exec ) QthreadExec(); }
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
// Wait for all calls to complete to insure that each worker has executed.
|
||||||
|
|
||||||
|
if ( s_number_workers == 1 + atomic_fetch_add( & lock_end , 1 ) ) { lock_end = 0 ; }
|
||||||
|
|
||||||
|
while ( lock_end );
|
||||||
|
|
||||||
|
/*
|
||||||
|
fprintf( stdout
|
||||||
|
, "QthreadExec resize worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n"
|
||||||
|
, (**exec).worker_rank()
|
||||||
|
, (**exec).worker_size()
|
||||||
|
, (**exec).shepherd_rank()
|
||||||
|
, (**exec).shepherd_size()
|
||||||
|
, (**exec).shepherd_worker_rank()
|
||||||
|
, (**exec).shepherd_worker_size()
|
||||||
|
);
|
||||||
|
fflush(stdout);
|
||||||
|
*/
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
if ( ! ok ) {
|
||||||
|
fprintf( stderr , "Kokkos::QthreadExec resize failed\n" );
|
||||||
|
fflush( stderr );
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0 ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void verify_is_process( const char * const label , bool not_active = false )
|
||||||
|
{
|
||||||
|
const bool not_process = 0 != qthread_shep() || 0 != qthread_worker_local(NULL);
|
||||||
|
const bool is_active = not_active && ( s_active_function || s_active_function_arg );
|
||||||
|
|
||||||
|
if ( not_process || is_active ) {
|
||||||
|
std::string msg( label );
|
||||||
|
msg.append( " : FAILED" );
|
||||||
|
if ( not_process ) msg.append(" : not called by main process");
|
||||||
|
if ( is_active ) msg.append(" : parallel execution in progress");
|
||||||
|
Kokkos::Impl::throw_runtime_exception( msg );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int QthreadExec::worker_per_shepherd()
|
||||||
|
{
|
||||||
|
return s_number_workers_per_shepherd ;
|
||||||
|
}
|
||||||
|
|
||||||
|
QthreadExec::QthreadExec()
|
||||||
|
{
|
||||||
|
const int shepherd_rank = qthread_shep();
|
||||||
|
const int shepherd_worker_rank = qthread_worker_local(NULL);
|
||||||
|
const int worker_rank = shepherd_rank * s_number_workers_per_shepherd + shepherd_worker_rank ;
|
||||||
|
|
||||||
|
m_worker_base = s_exec ;
|
||||||
|
m_shepherd_base = s_exec + s_number_workers_per_shepherd * ( ( s_number_shepherds - ( shepherd_rank + 1 ) ) );
|
||||||
|
m_scratch_alloc = ( (unsigned char *) this ) + s_base_size ;
|
||||||
|
m_reduce_end = s_worker_reduce_end ;
|
||||||
|
m_shepherd_rank = shepherd_rank ;
|
||||||
|
m_shepherd_size = s_number_shepherds ;
|
||||||
|
m_shepherd_worker_rank = shepherd_worker_rank ;
|
||||||
|
m_shepherd_worker_size = s_number_workers_per_shepherd ;
|
||||||
|
m_worker_rank = worker_rank ;
|
||||||
|
m_worker_size = s_number_workers ;
|
||||||
|
m_worker_state = QthreadExec::Active ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void QthreadExec::clear_workers()
|
||||||
|
{
|
||||||
|
for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) {
|
||||||
|
QthreadExec * const exec = s_exec[iwork] ;
|
||||||
|
s_exec[iwork] = 0 ;
|
||||||
|
free( exec );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void QthreadExec::shared_reset( Qthread::scratch_memory_space & space )
|
||||||
|
{
|
||||||
|
new( & space )
|
||||||
|
Qthread::scratch_memory_space(
|
||||||
|
((unsigned char *) (**m_shepherd_base).m_scratch_alloc ) + s_worker_shared_begin ,
|
||||||
|
s_worker_shared_end - s_worker_shared_begin
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
void QthreadExec::resize_worker_scratch( const int reduce_size , const int shared_size )
|
||||||
|
{
|
||||||
|
const int exec_all_reduce_alloc = align_alloc( reduce_size );
|
||||||
|
const int shepherd_scan_alloc = align_alloc( 8 );
|
||||||
|
const int shepherd_shared_end = exec_all_reduce_alloc + shepherd_scan_alloc + align_alloc( shared_size );
|
||||||
|
|
||||||
|
if ( s_worker_reduce_end < exec_all_reduce_alloc ||
|
||||||
|
s_worker_shared_end < shepherd_shared_end ) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
fprintf( stdout , "QthreadExec::resize\n");
|
||||||
|
fflush(stdout);
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Clear current worker memory before allocating new worker memory
|
||||||
|
clear_workers();
|
||||||
|
|
||||||
|
// Increase the buffers to an aligned allocation
|
||||||
|
s_worker_reduce_end = exec_all_reduce_alloc ;
|
||||||
|
s_worker_shared_begin = exec_all_reduce_alloc + shepherd_scan_alloc ;
|
||||||
|
s_worker_shared_end = shepherd_shared_end ;
|
||||||
|
|
||||||
|
// Need to query which shepherd this main 'process' is running...
|
||||||
|
|
||||||
|
const int main_shep = qthread_shep();
|
||||||
|
|
||||||
|
// Have each worker resize its memory for proper first-touch
|
||||||
|
#if 0
|
||||||
|
for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) {
|
||||||
|
for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i ) {
|
||||||
|
qthread_fork_to( driver_resize_worker_scratch , NULL , NULL , jshep );
|
||||||
|
}}
|
||||||
|
#else
|
||||||
|
// If this function is used before the 'qthread.task_policy' unit test
|
||||||
|
// the 'qthread.task_policy' unit test fails with a seg-fault within libqthread.so.
|
||||||
|
for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) {
|
||||||
|
const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1 ;
|
||||||
|
|
||||||
|
if ( num_clone ) {
|
||||||
|
const int ret = qthread_fork_clones_to_local_priority
|
||||||
|
( driver_resize_worker_scratch /* function */
|
||||||
|
, NULL /* function data block */
|
||||||
|
, NULL /* pointer to return value feb */
|
||||||
|
, jshep /* shepherd number */
|
||||||
|
, num_clone - 1 /* number of instances - 1 */
|
||||||
|
);
|
||||||
|
|
||||||
|
assert(ret == QTHREAD_SUCCESS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
driver_resize_worker_scratch( NULL );
|
||||||
|
|
||||||
|
// Verify all workers allocated
|
||||||
|
|
||||||
|
bool ok = true ;
|
||||||
|
for ( int iwork = 0 ; ok && iwork < s_number_workers ; ++iwork ) { ok = 0 != s_exec[iwork] ; }
|
||||||
|
|
||||||
|
if ( ! ok ) {
|
||||||
|
std::ostringstream msg ;
|
||||||
|
msg << "Kokkos::Impl::QthreadExec::resize : FAILED for workers {" ;
|
||||||
|
for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) {
|
||||||
|
if ( 0 == s_exec[iwork] ) { msg << " " << ( s_number_workers - ( iwork + 1 ) ); }
|
||||||
|
}
|
||||||
|
msg << " }" ;
|
||||||
|
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void QthreadExec::exec_all( Qthread & , QthreadExecFunctionPointer func , const void * arg )
|
||||||
|
{
|
||||||
|
verify_is_process("QthreadExec::exec_all(...)",true);
|
||||||
|
|
||||||
|
/*
|
||||||
|
fprintf( stdout , "QthreadExec::exec_all\n");
|
||||||
|
fflush(stdout);
|
||||||
|
*/
|
||||||
|
|
||||||
|
s_active_function = func ;
|
||||||
|
s_active_function_arg = arg ;
|
||||||
|
|
||||||
|
// Need to query which shepherd this main 'process' is running...
|
||||||
|
|
||||||
|
const int main_shep = qthread_shep();
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
for ( int jshep = 0 , iwork = 0 ; jshep < s_number_shepherds ; ++jshep ) {
|
||||||
|
for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i , ++iwork ) {
|
||||||
|
qthread_fork_to( driver_exec_all , NULL , NULL , jshep );
|
||||||
|
}}
|
||||||
|
#else
|
||||||
|
// If this function is used before the 'qthread.task_policy' unit test
|
||||||
|
// the 'qthread.task_policy' unit test fails with a seg-fault within libqthread.so.
|
||||||
|
for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) {
|
||||||
|
const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1 ;
|
||||||
|
|
||||||
|
if ( num_clone ) {
|
||||||
|
const int ret = qthread_fork_clones_to_local_priority
|
||||||
|
( driver_exec_all /* function */
|
||||||
|
, NULL /* function data block */
|
||||||
|
, NULL /* pointer to return value feb */
|
||||||
|
, jshep /* shepherd number */
|
||||||
|
, num_clone - 1 /* number of instances - 1 */
|
||||||
|
);
|
||||||
|
|
||||||
|
assert(ret == QTHREAD_SUCCESS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
driver_exec_all( NULL );
|
||||||
|
|
||||||
|
s_active_function = 0 ;
|
||||||
|
s_active_function_arg = 0 ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void * QthreadExec::exec_all_reduce_result()
|
||||||
|
{
|
||||||
|
return s_exec[0]->m_scratch_alloc ;
|
||||||
|
}
|
||||||
|
|
||||||
|
} /* namespace Impl */
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
QthreadTeamPolicyMember::QthreadTeamPolicyMember()
|
||||||
|
: m_exec( **worker_exec() )
|
||||||
|
, m_team_shared(0,0)
|
||||||
|
, m_team_size( 1 ) // s_number_workers_per_shepherd )
|
||||||
|
, m_team_rank( 0 ) // m_exec.shepherd_worker_rank() )
|
||||||
|
, m_league_size(1)
|
||||||
|
, m_league_end(1)
|
||||||
|
, m_league_rank(0)
|
||||||
|
{
|
||||||
|
m_exec.shared_reset( m_team_shared );
|
||||||
|
}
|
||||||
|
|
||||||
|
} /* namespace Impl */
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* #if defined( KOKKOS_HAVE_QTHREAD ) */
|
||||||
|
|
||||||
614
lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp
Executable file
614
lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp
Executable file
@ -0,0 +1,614 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_QTHREADEXEC_HPP
|
||||||
|
#define KOKKOS_QTHREADEXEC_HPP
|
||||||
|
|
||||||
|
#include <impl/Kokkos_spinwait.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class QthreadExec ;
|
||||||
|
|
||||||
|
typedef void (*QthreadExecFunctionPointer)( QthreadExec & , const void * );
|
||||||
|
|
||||||
|
class QthreadExec {
|
||||||
|
private:
|
||||||
|
|
||||||
|
enum { Inactive = 0 , Active = 1 };
|
||||||
|
|
||||||
|
const QthreadExec * const * m_worker_base ;
|
||||||
|
const QthreadExec * const * m_shepherd_base ;
|
||||||
|
|
||||||
|
void * m_scratch_alloc ; ///< Scratch memory [ reduce , team , shared ]
|
||||||
|
int m_reduce_end ; ///< End of scratch reduction memory
|
||||||
|
|
||||||
|
int m_shepherd_rank ;
|
||||||
|
int m_shepherd_size ;
|
||||||
|
|
||||||
|
int m_shepherd_worker_rank ;
|
||||||
|
int m_shepherd_worker_size ;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* m_worker_rank = m_shepherd_rank * m_shepherd_worker_size + m_shepherd_worker_rank
|
||||||
|
* m_worker_size = m_shepherd_size * m_shepherd_worker_size
|
||||||
|
*/
|
||||||
|
int m_worker_rank ;
|
||||||
|
int m_worker_size ;
|
||||||
|
|
||||||
|
int mutable volatile m_worker_state ;
|
||||||
|
|
||||||
|
|
||||||
|
friend class Kokkos::Qthread ;
|
||||||
|
|
||||||
|
~QthreadExec();
|
||||||
|
QthreadExec( const QthreadExec & );
|
||||||
|
QthreadExec & operator = ( const QthreadExec & );
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
QthreadExec();
|
||||||
|
|
||||||
|
/** Execute the input function on all available Qthread workers */
|
||||||
|
static void exec_all( Qthread & , QthreadExecFunctionPointer , const void * );
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
/** Barrier across all workers participating in the 'exec_all' */
|
||||||
|
void exec_all_barrier() const
|
||||||
|
{
|
||||||
|
const int rev_rank = m_worker_size - ( m_worker_rank + 1 );
|
||||||
|
|
||||||
|
int n , j ;
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
|
||||||
|
Impl::spinwait( m_worker_base[j]->m_worker_state , QthreadExec::Active );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( rev_rank ) {
|
||||||
|
m_worker_state = QthreadExec::Inactive ;
|
||||||
|
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
|
||||||
|
m_worker_base[j]->m_worker_state = QthreadExec::Active ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Barrier across workers within the shepherd with rank < team_rank */
|
||||||
|
void shepherd_barrier( const int team_size ) const
|
||||||
|
{
|
||||||
|
if ( m_shepherd_worker_rank < team_size ) {
|
||||||
|
|
||||||
|
const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 );
|
||||||
|
|
||||||
|
int n , j ;
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
|
||||||
|
Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( rev_rank ) {
|
||||||
|
m_worker_state = QthreadExec::Inactive ;
|
||||||
|
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
|
||||||
|
m_shepherd_base[j]->m_worker_state = QthreadExec::Active ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
/** Reduce across all workers participating in the 'exec_all' */
|
||||||
|
template< class FunctorType , class ArgTag >
|
||||||
|
inline
|
||||||
|
void exec_all_reduce( const FunctorType & func ) const
|
||||||
|
{
|
||||||
|
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
|
||||||
|
|
||||||
|
const int rev_rank = m_worker_size - ( m_worker_rank + 1 );
|
||||||
|
|
||||||
|
int n , j ;
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
|
||||||
|
const QthreadExec & fan = *m_worker_base[j];
|
||||||
|
|
||||||
|
Impl::spinwait( fan.m_worker_state , QthreadExec::Active );
|
||||||
|
|
||||||
|
ValueJoin::join( func , m_scratch_alloc , fan.m_scratch_alloc );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( rev_rank ) {
|
||||||
|
m_worker_state = QthreadExec::Inactive ;
|
||||||
|
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
|
||||||
|
m_worker_base[j]->m_worker_state = QthreadExec::Active ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
/** Scall across all workers participating in the 'exec_all' */
|
||||||
|
template< class FunctorType , class ArgTag >
|
||||||
|
inline
|
||||||
|
void exec_all_scan( const FunctorType & func ) const
|
||||||
|
{
|
||||||
|
typedef Kokkos::Impl::FunctorValueInit< FunctorType , ArgTag > ValueInit ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueOps< FunctorType , ArgTag > ValueOps ;
|
||||||
|
|
||||||
|
const int rev_rank = m_worker_size - ( m_worker_rank + 1 );
|
||||||
|
|
||||||
|
int n , j ;
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
|
||||||
|
Impl::spinwait( m_worker_base[j]->m_worker_state , QthreadExec::Active );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( rev_rank ) {
|
||||||
|
m_worker_state = QthreadExec::Inactive ;
|
||||||
|
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Root thread scans across values before releasing threads
|
||||||
|
// Worker data is in reverse order, so m_worker_base[0] is the
|
||||||
|
// highest ranking thread.
|
||||||
|
|
||||||
|
// Copy from lower ranking to higher ranking worker.
|
||||||
|
for ( int i = 1 ; i < m_worker_size ; ++i ) {
|
||||||
|
ValueOps::copy( func
|
||||||
|
, m_worker_base[i-1]->m_scratch_alloc
|
||||||
|
, m_worker_base[i]->m_scratch_alloc
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
ValueInit::init( func , m_worker_base[m_worker_size-1]->m_scratch_alloc );
|
||||||
|
|
||||||
|
// Join from lower ranking to higher ranking worker.
|
||||||
|
// Value at m_worker_base[n-1] is zero so skip adding it to m_worker_base[n-2].
|
||||||
|
for ( int i = m_worker_size - 1 ; --i ; ) {
|
||||||
|
ValueJoin::join( func , m_worker_base[i-1]->m_scratch_alloc , m_worker_base[i]->m_scratch_alloc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) {
|
||||||
|
m_worker_base[j]->m_worker_state = QthreadExec::Active ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
template< class Type>
|
||||||
|
inline
|
||||||
|
volatile Type * shepherd_team_scratch_value() const
|
||||||
|
{ return (volatile Type*)(((unsigned char *) m_scratch_alloc) + m_reduce_end); }
|
||||||
|
|
||||||
|
template< class Type >
|
||||||
|
inline
|
||||||
|
void shepherd_broadcast( Type & value , const int team_size , const int team_rank ) const
|
||||||
|
{
|
||||||
|
if ( m_shepherd_base ) {
|
||||||
|
Type * const shared_value = m_shepherd_base[0]->shepherd_team_scratch_value<Type>();
|
||||||
|
if ( m_shepherd_worker_rank == team_rank ) { *shared_value = value ; }
|
||||||
|
memory_fence();
|
||||||
|
shepherd_barrier( team_size );
|
||||||
|
value = *shared_value ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class Type >
|
||||||
|
inline
|
||||||
|
Type shepherd_reduce( const int team_size , const Type & value ) const
|
||||||
|
{
|
||||||
|
*shepherd_team_scratch_value<Type>() = value ;
|
||||||
|
|
||||||
|
memory_fence();
|
||||||
|
|
||||||
|
const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 );
|
||||||
|
|
||||||
|
int n , j ;
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
|
||||||
|
Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( rev_rank ) {
|
||||||
|
m_worker_state = QthreadExec::Inactive ;
|
||||||
|
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Type & accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>();
|
||||||
|
for ( int i = 1 ; i < n ; ++i ) {
|
||||||
|
accum += * m_shepherd_base[i]->shepherd_team_scratch_value<Type>();
|
||||||
|
}
|
||||||
|
for ( int i = 1 ; i < n ; ++i ) {
|
||||||
|
* m_shepherd_base[i]->shepherd_team_scratch_value<Type>() = accum ;
|
||||||
|
}
|
||||||
|
|
||||||
|
memory_fence();
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
|
||||||
|
m_shepherd_base[j]->m_worker_state = QthreadExec::Active ;
|
||||||
|
}
|
||||||
|
|
||||||
|
return *shepherd_team_scratch_value<Type>();
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class JoinOp >
|
||||||
|
inline
|
||||||
|
typename JoinOp::value_type
|
||||||
|
shepherd_reduce( const int team_size
|
||||||
|
, const typename JoinOp::value_type & value
|
||||||
|
, const JoinOp & op ) const
|
||||||
|
{
|
||||||
|
typedef typename JoinOp::value_type Type ;
|
||||||
|
|
||||||
|
*shepherd_team_scratch_value<Type>() = value ;
|
||||||
|
|
||||||
|
memory_fence();
|
||||||
|
|
||||||
|
const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 );
|
||||||
|
|
||||||
|
int n , j ;
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
|
||||||
|
Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( rev_rank ) {
|
||||||
|
m_worker_state = QthreadExec::Inactive ;
|
||||||
|
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
volatile Type & accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>();
|
||||||
|
for ( int i = 1 ; i < team_size ; ++i ) {
|
||||||
|
op.join( accum , * m_shepherd_base[i]->shepherd_team_scratch_value<Type>() );
|
||||||
|
}
|
||||||
|
for ( int i = 1 ; i < team_size ; ++i ) {
|
||||||
|
* m_shepherd_base[i]->shepherd_team_scratch_value<Type>() = accum ;
|
||||||
|
}
|
||||||
|
|
||||||
|
memory_fence();
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
|
||||||
|
m_shepherd_base[j]->m_worker_state = QthreadExec::Active ;
|
||||||
|
}
|
||||||
|
|
||||||
|
return *shepherd_team_scratch_value<Type>();
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class Type >
|
||||||
|
inline
|
||||||
|
Type shepherd_scan( const int team_size
|
||||||
|
, const Type & value
|
||||||
|
, Type * const global_value = 0 ) const
|
||||||
|
{
|
||||||
|
*shepherd_team_scratch_value<Type>() = value ;
|
||||||
|
|
||||||
|
memory_fence();
|
||||||
|
|
||||||
|
const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 );
|
||||||
|
|
||||||
|
int n , j ;
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
|
||||||
|
Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( rev_rank ) {
|
||||||
|
m_worker_state = QthreadExec::Inactive ;
|
||||||
|
Impl::spinwait( m_worker_state , QthreadExec::Inactive );
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Root thread scans across values before releasing threads
|
||||||
|
// Worker data is in reverse order, so m_shepherd_base[0] is the
|
||||||
|
// highest ranking thread.
|
||||||
|
|
||||||
|
// Copy from lower ranking to higher ranking worker.
|
||||||
|
|
||||||
|
Type accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>();
|
||||||
|
for ( int i = 1 ; i < team_size ; ++i ) {
|
||||||
|
const Type tmp = * m_shepherd_base[i]->shepherd_team_scratch_value<Type>();
|
||||||
|
accum += tmp ;
|
||||||
|
* m_shepherd_base[i-1]->shepherd_team_scratch_value<Type>() = tmp ;
|
||||||
|
}
|
||||||
|
|
||||||
|
* m_shepherd_base[team_size-1]->shepherd_team_scratch_value<Type>() =
|
||||||
|
global_value ? atomic_fetch_add( global_value , accum ) : 0 ;
|
||||||
|
|
||||||
|
// Join from lower ranking to higher ranking worker.
|
||||||
|
for ( int i = team_size ; --i ; ) {
|
||||||
|
* m_shepherd_base[i-1]->shepherd_team_scratch_value<Type>() += * m_shepherd_base[i]->shepherd_team_scratch_value<Type>();
|
||||||
|
}
|
||||||
|
|
||||||
|
memory_fence();
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) {
|
||||||
|
m_shepherd_base[j]->m_worker_state = QthreadExec::Active ;
|
||||||
|
}
|
||||||
|
|
||||||
|
return *shepherd_team_scratch_value<Type>();
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
static inline
|
||||||
|
int align_alloc( int size )
|
||||||
|
{
|
||||||
|
enum { ALLOC_GRAIN = 1 << 6 /* power of two, 64bytes */};
|
||||||
|
enum { ALLOC_GRAIN_MASK = ALLOC_GRAIN - 1 };
|
||||||
|
return ( size + ALLOC_GRAIN_MASK ) & ~ALLOC_GRAIN_MASK ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void shared_reset( Qthread::scratch_memory_space & );
|
||||||
|
|
||||||
|
void * exec_all_reduce_value() const { return m_scratch_alloc ; }
|
||||||
|
|
||||||
|
static void * exec_all_reduce_result();
|
||||||
|
|
||||||
|
static void resize_worker_scratch( const int reduce_size , const int shared_size );
|
||||||
|
static void clear_workers();
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
inline int worker_rank() const { return m_worker_rank ; }
|
||||||
|
inline int worker_size() const { return m_worker_size ; }
|
||||||
|
inline int shepherd_worker_rank() const { return m_shepherd_worker_rank ; }
|
||||||
|
inline int shepherd_worker_size() const { return m_shepherd_worker_size ; }
|
||||||
|
inline int shepherd_rank() const { return m_shepherd_rank ; }
|
||||||
|
inline int shepherd_size() const { return m_shepherd_size ; }
|
||||||
|
|
||||||
|
static int worker_per_shepherd();
|
||||||
|
};
|
||||||
|
|
||||||
|
} /* namespace Impl */
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
class QthreadTeamPolicyMember {
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef Kokkos::Qthread execution_space ;
|
||||||
|
typedef execution_space::scratch_memory_space scratch_memory_space ;
|
||||||
|
|
||||||
|
|
||||||
|
Impl::QthreadExec & m_exec ;
|
||||||
|
scratch_memory_space m_team_shared ;
|
||||||
|
const int m_team_size ;
|
||||||
|
const int m_team_rank ;
|
||||||
|
const int m_league_size ;
|
||||||
|
const int m_league_end ;
|
||||||
|
int m_league_rank ;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
const scratch_memory_space & team_shmem() const { return m_team_shared ; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; }
|
||||||
|
KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; }
|
||||||
|
KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; }
|
||||||
|
KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION void team_barrier() const
|
||||||
|
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
{}
|
||||||
|
#else
|
||||||
|
{ m_exec.shepherd_barrier( m_team_size ); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template< typename Type >
|
||||||
|
KOKKOS_INLINE_FUNCTION Type team_broadcast( const Type & value , int rank ) const
|
||||||
|
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
{ return Type(); }
|
||||||
|
#else
|
||||||
|
{ return m_exec.template shepherd_broadcast<Type>( value , m_team_size , rank ); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template< typename Type >
|
||||||
|
KOKKOS_INLINE_FUNCTION Type team_reduce( const Type & value ) const
|
||||||
|
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
{ return Type(); }
|
||||||
|
#else
|
||||||
|
{ return m_exec.template shepherd_reduce<Type>( m_team_size , value ); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template< typename JoinOp >
|
||||||
|
KOKKOS_INLINE_FUNCTION typename JoinOp::value_type
|
||||||
|
team_reduce( const typename JoinOp::value_type & value
|
||||||
|
, const JoinOp & op ) const
|
||||||
|
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
{ return typename JoinOp::value_type(); }
|
||||||
|
#else
|
||||||
|
{ return m_exec.template shepherd_reduce<JoinOp>( m_team_size , value , op ); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/** \brief Intra-team exclusive prefix sum with team_rank() ordering.
|
||||||
|
*
|
||||||
|
* The highest rank thread can compute the reduction total as
|
||||||
|
* reduction_total = dev.team_scan( value ) + value ;
|
||||||
|
*/
|
||||||
|
template< typename Type >
|
||||||
|
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const
|
||||||
|
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
{ return Type(); }
|
||||||
|
#else
|
||||||
|
{ return m_exec.template shepherd_scan<Type>( m_team_size , value ); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/** \brief Intra-team exclusive prefix sum with team_rank() ordering
|
||||||
|
* with intra-team non-deterministic ordering accumulation.
|
||||||
|
*
|
||||||
|
* The global inter-team accumulation value will, at the end of the
|
||||||
|
* league's parallel execution, be the scan's total.
|
||||||
|
* Parallel execution ordering of the league's teams is non-deterministic.
|
||||||
|
* As such the base value for each team's scan operation is similarly
|
||||||
|
* non-deterministic.
|
||||||
|
*/
|
||||||
|
template< typename Type >
|
||||||
|
KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const
|
||||||
|
#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||||
|
{ return Type(); }
|
||||||
|
#else
|
||||||
|
{ return m_exec.template shepherd_scan<Type>( m_team_size , value , global_accum ); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
// Private driver for task-team parallel
|
||||||
|
|
||||||
|
QthreadTeamPolicyMember();
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
// Private for the driver ( for ( member_type i(exec,team); i ; i.next_team() ) { ... }
|
||||||
|
|
||||||
|
// Initialize
|
||||||
|
template< class Arg0 , class Arg1 >
|
||||||
|
QthreadTeamPolicyMember( Impl::QthreadExec & exec , const TeamPolicy<Arg0,Arg1,Qthread> & team )
|
||||||
|
: m_exec( exec )
|
||||||
|
, m_team_shared(0,0)
|
||||||
|
, m_team_size( team.m_team_size )
|
||||||
|
, m_team_rank( exec.shepherd_worker_rank() )
|
||||||
|
, m_league_size( team.m_league_size )
|
||||||
|
, m_league_end( team.m_league_size - team.m_shepherd_iter * ( exec.shepherd_size() - ( exec.shepherd_rank() + 1 ) ) )
|
||||||
|
, m_league_rank( m_league_end > team.m_shepherd_iter ? m_league_end - team.m_shepherd_iter : 0 )
|
||||||
|
{
|
||||||
|
m_exec.shared_reset( m_team_shared );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Continue
|
||||||
|
operator bool () const { return m_league_rank < m_league_end ; }
|
||||||
|
|
||||||
|
// iterate
|
||||||
|
void next_team() { ++m_league_rank ; m_exec.shared_reset( m_team_shared ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
|
||||||
|
template< class Arg0 , class Arg1 >
|
||||||
|
class TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
const int m_league_size ;
|
||||||
|
const int m_team_size ;
|
||||||
|
const int m_shepherd_iter ;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
//! Tag this class as a kokkos execution policy
|
||||||
|
typedef TeamPolicy execution_policy ;
|
||||||
|
typedef Qthread execution_space ;
|
||||||
|
|
||||||
|
typedef typename
|
||||||
|
Impl::if_c< ! Impl::is_same< Kokkos::Qthread , Arg0 >::value , Arg0 , Arg1 >::type
|
||||||
|
work_tag ;
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
inline static
|
||||||
|
int team_size_max( const FunctorType & )
|
||||||
|
{ return Qthread::instance().shepherd_worker_size(); }
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
static int team_size_recommended( const FunctorType & f )
|
||||||
|
{ return team_size_max( f ); }
|
||||||
|
|
||||||
|
template< class FunctorType >
|
||||||
|
inline static
|
||||||
|
int team_size_recommended( const FunctorType & f , const int& )
|
||||||
|
{ return team_size_max( f ); }
|
||||||
|
|
||||||
|
//----------------------------------------
|
||||||
|
|
||||||
|
inline int team_size() const { return m_team_size ; }
|
||||||
|
inline int league_size() const { return m_league_size ; }
|
||||||
|
|
||||||
|
// One active team per shepherd
|
||||||
|
TeamPolicy( Kokkos::Qthread & q
|
||||||
|
, const int league_size
|
||||||
|
, const int team_size
|
||||||
|
)
|
||||||
|
: m_league_size( league_size )
|
||||||
|
, m_team_size( team_size < q.shepherd_worker_size()
|
||||||
|
? team_size : q.shepherd_worker_size() )
|
||||||
|
, m_shepherd_iter( ( league_size + q.shepherd_size() - 1 ) / q.shepherd_size() )
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
// One active team per shepherd
|
||||||
|
TeamPolicy( const int league_size
|
||||||
|
, const int team_size
|
||||||
|
)
|
||||||
|
: m_league_size( league_size )
|
||||||
|
, m_team_size( team_size < Qthread::instance().shepherd_worker_size()
|
||||||
|
? team_size : Qthread::instance().shepherd_worker_size() )
|
||||||
|
, m_shepherd_iter( ( league_size + Qthread::instance().shepherd_size() - 1 ) / Qthread::instance().shepherd_size() )
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef Impl::QthreadTeamPolicyMember member_type ;
|
||||||
|
|
||||||
|
friend class Impl::QthreadTeamPolicyMember ;
|
||||||
|
};
|
||||||
|
|
||||||
|
} /* namespace Kokkos */
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#endif /* #define KOKKOS_QTHREADEXEC_HPP */
|
||||||
|
|
||||||
643
lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp
Executable file
643
lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp
Executable file
@ -0,0 +1,643 @@
|
|||||||
|
/*
|
||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 2.0
|
||||||
|
// Copyright (2014) Sandia Corporation
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. Neither the name of the Corporation nor the names of the
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||||
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||||
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||||
|
//
|
||||||
|
// ************************************************************************
|
||||||
|
//@HEADER
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef KOKKOS_QTHREAD_PARALLEL_HPP
|
||||||
|
#define KOKKOS_QTHREAD_PARALLEL_HPP
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <Kokkos_Parallel.hpp>
|
||||||
|
|
||||||
|
#include <impl/Kokkos_StaticAssert.hpp>
|
||||||
|
#include <impl/Kokkos_FunctorAdapter.hpp>
|
||||||
|
|
||||||
|
#include <Qthread/Kokkos_QthreadExec.hpp>
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
namespace Impl {
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
|
||||||
|
class ParallelFor< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > Policy ;
|
||||||
|
|
||||||
|
const FunctorType m_func ;
|
||||||
|
const Policy m_policy ;
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if<
|
||||||
|
( Impl::is_same< typename PType::work_tag , void >::value )
|
||||||
|
, const FunctorType & >::type functor
|
||||||
|
, const PType & range )
|
||||||
|
{
|
||||||
|
const typename PType::member_type e = range.end();
|
||||||
|
for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
|
||||||
|
functor( i );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if<
|
||||||
|
( ! Impl::is_same< typename PType::work_tag , void >::value )
|
||||||
|
, const FunctorType & >::type functor
|
||||||
|
, const PType & range )
|
||||||
|
{
|
||||||
|
const typename PType::member_type e = range.end();
|
||||||
|
for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
|
||||||
|
functor( typename PType::work_tag() , i );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function is called once by every concurrent thread.
|
||||||
|
static void execute( QthreadExec & exec , const void * arg )
|
||||||
|
{
|
||||||
|
const ParallelFor & self = * ((const ParallelFor *) arg );
|
||||||
|
|
||||||
|
driver( self.m_func , typename Policy::WorkRange( self.m_policy , exec.worker_rank() , exec.worker_size() ) );
|
||||||
|
|
||||||
|
// All threads wait for completion.
|
||||||
|
exec.exec_all_barrier();
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
ParallelFor( const FunctorType & functor
|
||||||
|
, const Policy & policy
|
||||||
|
)
|
||||||
|
: m_func( functor )
|
||||||
|
, m_policy( policy )
|
||||||
|
{
|
||||||
|
Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::execute , this );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
|
||||||
|
class ParallelReduce< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > Policy ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
|
||||||
|
|
||||||
|
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||||
|
typedef typename ValueTraits::reference_type reference_type ;
|
||||||
|
|
||||||
|
const FunctorType m_func ;
|
||||||
|
const Policy m_policy ;
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if<
|
||||||
|
( Impl::is_same< typename PType::work_tag , void >::value )
|
||||||
|
, const FunctorType & >::type functor
|
||||||
|
, reference_type update
|
||||||
|
, const PType & range )
|
||||||
|
{
|
||||||
|
const typename PType::member_type e = range.end();
|
||||||
|
for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
|
||||||
|
functor( i , update );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if<
|
||||||
|
( ! Impl::is_same< typename PType::work_tag , void >::value )
|
||||||
|
, const FunctorType & >::type functor
|
||||||
|
, reference_type update
|
||||||
|
, const PType & range )
|
||||||
|
{
|
||||||
|
const typename PType::member_type e = range.end();
|
||||||
|
for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
|
||||||
|
functor( typename PType::work_tag() , i , update );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void execute( QthreadExec & exec , const void * arg )
|
||||||
|
{
|
||||||
|
const ParallelReduce & self = * ((const ParallelReduce *) arg );
|
||||||
|
|
||||||
|
driver( self.m_func
|
||||||
|
, ValueInit::init( self.m_func , exec.exec_all_reduce_value() )
|
||||||
|
, typename Policy::WorkRange( self.m_policy , exec.worker_rank() , exec.worker_size() )
|
||||||
|
);
|
||||||
|
|
||||||
|
exec.template exec_all_reduce<FunctorType, typename Policy::work_tag >( self.m_func );
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
template< class HostViewType >
|
||||||
|
ParallelReduce( const FunctorType & functor
|
||||||
|
, const Policy & policy
|
||||||
|
, const HostViewType & result_view )
|
||||||
|
: m_func( functor )
|
||||||
|
, m_policy( policy )
|
||||||
|
{
|
||||||
|
QthreadExec::resize_worker_scratch( ValueTraits::value_size( m_func ) , 0 );
|
||||||
|
|
||||||
|
Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::execute , this );
|
||||||
|
|
||||||
|
const pointer_type data = (pointer_type) QthreadExec::exec_all_reduce_result();
|
||||||
|
|
||||||
|
Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( m_func , data );
|
||||||
|
|
||||||
|
if ( result_view.ptr_on_device() ) {
|
||||||
|
const unsigned n = ValueTraits::value_count( m_func );
|
||||||
|
for ( unsigned i = 0 ; i < n ; ++i ) { result_view.ptr_on_device()[i] = data[i]; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 >
|
||||||
|
class ParallelFor< FunctorType , TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread > Policy ;
|
||||||
|
|
||||||
|
const FunctorType m_func ;
|
||||||
|
const Policy m_team ;
|
||||||
|
|
||||||
|
template< class TagType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
|
||||||
|
const typename Policy::member_type & >::type member ) const
|
||||||
|
{ m_func( member ); }
|
||||||
|
|
||||||
|
template< class TagType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
|
||||||
|
const typename Policy::member_type & >::type member ) const
|
||||||
|
{ m_func( TagType() , member ); }
|
||||||
|
|
||||||
|
static void execute( QthreadExec & exec , const void * arg )
|
||||||
|
{
|
||||||
|
const ParallelFor & self = * ((const ParallelFor *) arg );
|
||||||
|
|
||||||
|
typename Policy::member_type member( exec , self.m_team );
|
||||||
|
|
||||||
|
while ( member ) {
|
||||||
|
self.ParallelFor::template driver< typename Policy::work_tag >( member );
|
||||||
|
member.team_barrier();
|
||||||
|
member.next_team();
|
||||||
|
}
|
||||||
|
|
||||||
|
exec.exec_all_barrier();
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
ParallelFor( const FunctorType & functor ,
|
||||||
|
const Policy & policy )
|
||||||
|
: m_func( functor )
|
||||||
|
, m_team( policy )
|
||||||
|
{
|
||||||
|
QthreadExec::resize_worker_scratch
|
||||||
|
( /* reduction memory */ 0
|
||||||
|
, /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() ) );
|
||||||
|
|
||||||
|
Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::execute , this );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 >
|
||||||
|
class ParallelReduce< FunctorType , TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef TeamPolicy< Arg0 , Arg1 , Kokkos::Qthread > Policy ;
|
||||||
|
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
|
||||||
|
|
||||||
|
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||||
|
typedef typename ValueTraits::reference_type reference_type ;
|
||||||
|
|
||||||
|
const FunctorType m_func ;
|
||||||
|
const Policy m_team ;
|
||||||
|
|
||||||
|
template< class TagType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
void driver( typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
|
||||||
|
const typename Policy::member_type & >::type member
|
||||||
|
, reference_type update ) const
|
||||||
|
{ m_func( member , update ); }
|
||||||
|
|
||||||
|
template< class TagType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
|
void driver( typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
|
||||||
|
const typename Policy::member_type & >::type member
|
||||||
|
, reference_type update ) const
|
||||||
|
{ m_func( TagType() , member , update ); }
|
||||||
|
|
||||||
|
static void execute( QthreadExec & exec , const void * arg )
|
||||||
|
{
|
||||||
|
const ParallelReduce & self = * ((const ParallelReduce *) arg );
|
||||||
|
|
||||||
|
// Initialize thread-local value
|
||||||
|
reference_type update = ValueInit::init( self.m_func , exec.exec_all_reduce_value() );
|
||||||
|
|
||||||
|
typename Policy::member_type member( exec , self.m_team );
|
||||||
|
|
||||||
|
while ( member ) {
|
||||||
|
self.ParallelReduce::template driver< typename Policy::work_tag >( member , update );
|
||||||
|
member.team_barrier();
|
||||||
|
member.next_team();
|
||||||
|
}
|
||||||
|
|
||||||
|
exec.template exec_all_reduce< FunctorType , typename Policy::work_tag >( self.m_func );
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
template< class ViewType >
|
||||||
|
ParallelReduce( const FunctorType & functor ,
|
||||||
|
const Policy & policy ,
|
||||||
|
const ViewType & result )
|
||||||
|
: m_func( functor )
|
||||||
|
, m_team( policy )
|
||||||
|
{
|
||||||
|
QthreadExec::resize_worker_scratch
|
||||||
|
( /* reduction memory */ ValueTraits::value_size( functor )
|
||||||
|
, /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() ) );
|
||||||
|
|
||||||
|
Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::execute , this );
|
||||||
|
|
||||||
|
const pointer_type data = (pointer_type) QthreadExec::exec_all_reduce_result();
|
||||||
|
|
||||||
|
Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( m_func , data );
|
||||||
|
|
||||||
|
const unsigned n = ValueTraits::value_count( m_func );
|
||||||
|
for ( unsigned i = 0 ; i < n ; ++i ) { result.ptr_on_device()[i] = data[i]; }
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template< class FunctorType , class Arg0 , class Arg1 , class Arg2 >
|
||||||
|
class ParallelScan< FunctorType , Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > >
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
typedef Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Qthread > Policy ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
|
||||||
|
typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
|
||||||
|
|
||||||
|
typedef typename ValueTraits::pointer_type pointer_type ;
|
||||||
|
typedef typename ValueTraits::reference_type reference_type ;
|
||||||
|
|
||||||
|
const FunctorType m_func ;
|
||||||
|
const Policy m_policy ;
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if<
|
||||||
|
( Impl::is_same< typename PType::work_tag , void >::value )
|
||||||
|
, const FunctorType & >::type functor
|
||||||
|
, reference_type update
|
||||||
|
, const bool final
|
||||||
|
, const PType & range )
|
||||||
|
{
|
||||||
|
const typename PType::member_type e = range.end();
|
||||||
|
for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
|
||||||
|
functor( i , update , final );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template< class PType >
|
||||||
|
KOKKOS_FORCEINLINE_FUNCTION static
|
||||||
|
void driver( typename Impl::enable_if<
|
||||||
|
( ! Impl::is_same< typename PType::work_tag , void >::value )
|
||||||
|
, const FunctorType & >::type functor
|
||||||
|
, reference_type update
|
||||||
|
, const bool final
|
||||||
|
, const PType & range )
|
||||||
|
{
|
||||||
|
const typename PType::member_type e = range.end();
|
||||||
|
for ( typename PType::member_type i = range.begin() ; i < e ; ++i ) {
|
||||||
|
functor( typename PType::work_tag() , i , update , final );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void execute( QthreadExec & exec , const void * arg )
|
||||||
|
{
|
||||||
|
const ParallelScan & self = * ((const ParallelScan *) arg );
|
||||||
|
|
||||||
|
const typename Policy::WorkRange range( self.m_policy , exec.worker_rank() , exec.worker_size() );
|
||||||
|
|
||||||
|
// Initialize thread-local value
|
||||||
|
reference_type update = ValueInit::init( self.m_func , exec.exec_all_reduce_value() );
|
||||||
|
|
||||||
|
driver( self.m_func , update , false , range );
|
||||||
|
|
||||||
|
exec.template exec_all_scan< FunctorType , typename Policy::work_tag >( self.m_func );
|
||||||
|
|
||||||
|
driver( self.m_func , update , true , range );
|
||||||
|
|
||||||
|
exec.exec_all_barrier();
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
ParallelScan( const FunctorType & functor
|
||||||
|
, const Policy & policy
|
||||||
|
)
|
||||||
|
: m_func( functor )
|
||||||
|
, m_policy( policy )
|
||||||
|
{
|
||||||
|
QthreadExec::resize_worker_scratch( ValueTraits::value_size( m_func ) , 0 );
|
||||||
|
|
||||||
|
Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelScan::execute , this );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Impl
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
template<typename iType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>
|
||||||
|
TeamThreadRange(const Impl::QthreadTeamPolicyMember& thread, const iType& count)
|
||||||
|
{
|
||||||
|
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>(thread,count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename iType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>
|
||||||
|
TeamThreadRange( const Impl::QthreadTeamPolicyMember& thread
|
||||||
|
, const iType & begin
|
||||||
|
, const iType & end
|
||||||
|
)
|
||||||
|
{
|
||||||
|
return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>(thread,begin,end);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename iType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >
|
||||||
|
ThreadVectorRange(const Impl::QthreadTeamPolicyMember& thread, const iType& count) {
|
||||||
|
return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >(thread,count);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember> PerTeam(const Impl::QthreadTeamPolicyMember& thread) {
|
||||||
|
return Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>(thread);
|
||||||
|
}
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember> PerThread(const Impl::QthreadTeamPolicyMember& thread) {
|
||||||
|
return Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>(thread);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all threads of the the calling thread team.
|
||||||
|
* This functionality requires C++11 support.*/
|
||||||
|
template<typename iType, class Lambda>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries, const Lambda& lambda) {
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||||
|
lambda(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of
|
||||||
|
* val is performed and put into result. This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class Lambda, typename ValueType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries,
|
||||||
|
const Lambda & lambda, ValueType& result) {
|
||||||
|
|
||||||
|
result = ValueType();
|
||||||
|
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
ValueType tmp = ValueType();
|
||||||
|
lambda(i,tmp);
|
||||||
|
result+=tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined( KOKKOS_HAVE_CXX11 )
|
||||||
|
|
||||||
|
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
|
||||||
|
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
|
||||||
|
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
|
||||||
|
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
|
||||||
|
* '1 for *'). This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class Lambda, typename ValueType, class JoinType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries,
|
||||||
|
const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||||
|
|
||||||
|
ValueType result = init_result;
|
||||||
|
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
ValueType tmp = ValueType();
|
||||||
|
lambda(i,tmp);
|
||||||
|
join(result,tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* #if defined( KOKKOS_HAVE_CXX11 ) */
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread.
|
||||||
|
* This functionality requires C++11 support.*/
|
||||||
|
template<typename iType, class Lambda>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
|
||||||
|
loop_boundaries, const Lambda& lambda) {
|
||||||
|
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||||
|
#pragma ivdep
|
||||||
|
#endif
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||||
|
lambda(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of
|
||||||
|
* val is performed and put into result. This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class Lambda, typename ValueType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
|
||||||
|
loop_boundaries, const Lambda & lambda, ValueType& result) {
|
||||||
|
result = ValueType();
|
||||||
|
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||||
|
#pragma ivdep
|
||||||
|
#endif
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
ValueType tmp = ValueType();
|
||||||
|
lambda(i,tmp);
|
||||||
|
result+=tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of
|
||||||
|
* val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result.
|
||||||
|
* The input value of init_result is used as initializer for temporary variables of ValueType. Therefore
|
||||||
|
* the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or
|
||||||
|
* '1 for *'). This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class Lambda, typename ValueType, class JoinType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
|
||||||
|
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||||
|
|
||||||
|
ValueType result = init_result;
|
||||||
|
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||||
|
#pragma ivdep
|
||||||
|
#endif
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
ValueType tmp = ValueType();
|
||||||
|
lambda(i,tmp);
|
||||||
|
join(result,tmp);
|
||||||
|
}
|
||||||
|
init_result = result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final)
|
||||||
|
* for each i=0..N-1.
|
||||||
|
*
|
||||||
|
* The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed.
|
||||||
|
* Depending on the target execution space the operator might be called twice: once with final=false
|
||||||
|
* and once with final=true. When final==true val contains the prefix sum value. The contribution of this
|
||||||
|
* "i" needs to be added to val no matter whether final==true or not. In a serial execution
|
||||||
|
* (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set
|
||||||
|
* to the final sum value over all vector lanes.
|
||||||
|
* This functionality requires C++11 support.*/
|
||||||
|
template< typename iType, class FunctorType >
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >&
|
||||||
|
loop_boundaries, const FunctorType & lambda) {
|
||||||
|
|
||||||
|
typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
|
||||||
|
typedef typename ValueTraits::value_type value_type ;
|
||||||
|
|
||||||
|
value_type scan_val = value_type();
|
||||||
|
|
||||||
|
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||||
|
#pragma ivdep
|
||||||
|
#endif
|
||||||
|
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||||
|
lambda(i,scan_val,true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
namespace Kokkos {
|
||||||
|
|
||||||
|
template<class FunctorType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void single(const Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda) {
|
||||||
|
lambda();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class FunctorType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void single(const Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda) {
|
||||||
|
if(single_struct.team_member.team_rank()==0) lambda();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class FunctorType, class ValueType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void single(const Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda, ValueType& val) {
|
||||||
|
lambda(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class FunctorType, class ValueType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void single(const Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda, ValueType& val) {
|
||||||
|
if(single_struct.team_member.team_rank()==0) {
|
||||||
|
lambda(val);
|
||||||
|
}
|
||||||
|
single_struct.team_member.team_broadcast(val,0);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* #define KOKKOS_QTHREAD_PARALLEL_HPP */
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user