Merge pull request #4322 from stanmoore1/kk_update_4.4.0

Update Kokkos library in LAMMPS to v4.4.1
This commit is contained in:
Axel Kohlmeyer
2024-09-26 16:29:31 -04:00
committed by GitHub
289 changed files with 14825 additions and 10040 deletions

View File

@ -8,8 +8,24 @@ endif()
########################################################################
# consistency checks and Kokkos options/settings required by LAMMPS
if(Kokkos_ENABLE_CUDA)
message(STATUS "KOKKOS: Enabling CUDA LAMBDA function support")
set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "" FORCE)
option(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC "CUDA asynchronous malloc support" OFF)
mark_as_advanced(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC)
if(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC)
message(STATUS "KOKKOS: CUDA malloc async support enabled")
else()
message(STATUS "KOKKOS: CUDA malloc async support disabled")
endif()
endif()
if(Kokkos_ENABLE_HIP)
option(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS "Enable multiple kernel instantiations with HIP" ON)
mark_as_advanced(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS)
option(Kokkos_ENABLE_ROCTHRUST "Use RoCThrust library" ON)
mark_as_advanced(Kokkos_ENABLE_ROCTHRUST)
if(Kokkos_ARCH_AMD_GFX942 OR Kokkos_ARCH_AMD_GFX940)
option(Kokkos_ENABLE_IMPL_HIP_UNIFIED_MEMORY "Enable unified memory with HIP" ON)
mark_as_advanced(Kokkos_ENABLE_IMPL_HIP_UNIFIED_MEMORY)
endif()
endif()
# Adding OpenMP compiler flags without the checks done for
# BUILD_OMP can result in compile failures. Enforce consistency.
@ -18,6 +34,15 @@ if(Kokkos_ENABLE_OPENMP)
message(FATAL_ERROR "Must enable BUILD_OMP with Kokkos_ENABLE_OPENMP")
endif()
endif()
if(Kokkos_ENABLE_SERIAL)
if(NOT (Kokkos_ENABLE_OPENMP OR Kokkos_ENABLE_THREADS OR
Kokkos_ENABLE_CUDA OR Kokkos_ENABLE_HIP OR Kokkos_ENABLE_SYCL
OR Kokkos_ENABLE_OPENMPTARGET))
option(Kokkos_ENABLE_ATOMICS_BYPASS "Disable atomics for Kokkos Serial Backend" ON)
mark_as_advanced(Kokkos_ENABLE_ATOMICS_BYPASS)
endif()
endif()
########################################################################
option(EXTERNAL_KOKKOS "Build against external kokkos library" OFF)
@ -45,8 +70,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject)
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.3.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "243de871b3dc2cf3990c1c404032df83" CACHE STRING "MD5 checksum of KOKKOS tarball")
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.4.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "de6ee80d00b6212b02bfb7f1e71a8392" CACHE STRING "MD5 checksum of KOKKOS tarball")
mark_as_advanced(KOKKOS_URL)
mark_as_advanced(KOKKOS_MD5)
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
@ -71,7 +96,7 @@ if(DOWNLOAD_KOKKOS)
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 4.3.01 REQUIRED CONFIG)
find_package(Kokkos 4.4.01 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else()
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)

View File

@ -1,12 +1,103 @@
# CHANGELOG
## [4.4.01](https://github.com/kokkos/kokkos/tree/4.4.01)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.00...4.4.01)
### Features:
* Introduce new SequentialHostInit view allocation property [\#7229](https://github.com/kokkos/kokkos/pull/7229)
### Backend and Architecture Enhancements:
#### CUDA:
* Experimental support for unified memory mode (intended for Grace-Hopper etc.) [\#6823](https://github.com/kokkos/kokkos/pull/6823)
### Bug Fixes
* OpenMP: Fix issue related to the visibility of an internal symbol with shared libraries that affected `ScatterView` in particular [\#7284](https://github.com/kokkos/kokkos/pull/7284)
* Fix implicit copy assignment operators in few AVX2 masks being deleted [#7296](https://github.com/kokkos/kokkos/pull/7296)
## [4.4.00](https://github.com/kokkos/kokkos/tree/4.4.00)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.01...4.4.00)
### Features:
* Add `Kokkos::View` conversions from and to [`std::mdspan`](https://en.cppreference.com/w/cpp/container/mdspan) [\#6830](https://github.com/kokkos/kokkos/pull/6830) [\#7069](https://github.com/kokkos/kokkos/pull/7069)
### Backend and Architecture Enhancements:
#### CUDA:
* `nvcc_wrapper`: Adding ability to process `--disable-warnings` flag [\#6936](https://github.com/kokkos/kokkos/issues/6936)
* Use recommended/max team size functions in Cuda ParallelFor and Reduce constructors [\#6891](https://github.com/kokkos/kokkos/issues/6891)
* Improve compile-times when building with `Kokkos_ENABLE_DEBUG_BOUNDS_CHECK` in Cuda [\#7013](https://github.com/kokkos/kokkos/pull/7013)
#### HIP:
* Use HIP builtin atomics [\#6882](https://github.com/kokkos/kokkos/pull/6882) [\#7000](https://github.com/kokkos/kokkos/pull/7000)
* Enable user-specified compiler and linker flags for AMD GPUs [\#7127](https://github.com/kokkos/kokkos/pull/7127)
#### SYCL:
* Add support for Graphs [\#6912](https://github.com/kokkos/kokkos/pull/6912)
* Fix multi-GPU support [\#6887](https://github.com/kokkos/kokkos/pull/6887)
* Improve performance of reduction and scan operations [\#6562](https://github.com/kokkos/kokkos/pull/6562), [\#6750](https://github.com/kokkos/kokkos/pull/6750)
* Fix lock for guarding scratch space in `TeamPolicy` `parallel_reduce` [\#6988](https://github.com/kokkos/kokkos/pull/6988)
* Include submission command queue property information into `SYCL::print_configuration()` [\#7004](https://github.com/kokkos/kokkos/pull/7004)
#### OpenACC:
* Make `TeamPolicy` `parallel_for` execute on the correct async queue [\#7012](https://github.com/kokkos/kokkos/pull/7012)
#### OpenMPTarget:
* Honor user requested loop ordering in `MDRange` policy [\#6925](https://github.com/kokkos/kokkos/pull/6925)
* Prevent data races by guarding the scratch space used in `parallel_scan` [\#6998](https://github.com/kokkos/kokkos/pull/6998)
#### HPX:
* Workaround issue with template argument deduction to support compilation with NVCC [\#7015](https://github.com/kokkos/kokkos/pull/7015)
### General Enhancements
* Improve performance of view copies in host parallel regions [\#6730](https://github.com/kokkos/kokkos/pull/6730)
* Harmonize convertibility rules of `Kokkos::RandomAccessIterator` with `View`s [\#6929](https://github.com/kokkos/kokkos/pull/6929)
* Add a check precondition non-overlapping ranges for the `adjacent_difference` algorithm in debug mode [\#6922](https://github.com/kokkos/kokkos/pull/6922)
* Add deduction guides for `TeamPolicy` [\#7030](https://github.com/kokkos/kokkos/pull/7030)
* SIMD: Allow flexible vector width for 32 bit types [\#6802](https://github.com/kokkos/kokkos/pull/6802)
* Updates for `Kokkos::Array`: add `kokkos_swap(Array<T, N>)` specialization [\#6943](https://github.com/kokkos/kokkos/pull/6943), add `Kokkos::to_array` [\#6375](https://github.com/kokkos/kokkos/pull/6375), make `Kokkos::Array` equality-comparable [\#7148](https://github.com/kokkos/kokkos/pull/7148)
* Structured binding support for `Kokkos::complex` [\#7040](https://github.com/kokkos/kokkos/pull/7040)
### Build System Changes
* Do not require OpenMP support for languages other than CXX [\#6965](https://github.com/kokkos/kokkos/pull/6965)
* Update Intel GPU architectures in Makefile [\#6895](https://github.com/kokkos/kokkos/pull/6895)
* Fix use of OpenMP with Cuda or HIP as compile language [\#6972](https://github.com/kokkos/kokkos/pull/6972)
* Define and enforce new minimum compiler versions for C++20 support [\#7128](https://github.com/kokkos/kokkos/pull/7128), [\#7123](https://github.com/kokkos/kokkos/pull/7123)
* Add nvidia Grace CPU architecture: `Kokkos_ARCH_ARMV9_GRACE` [\#7158](https://github.com/kokkos/kokkos/pull/7158)
* Fix Makefile.kokkos for Threads [\#6896](https://github.com/kokkos/kokkos/pull/6896)
* Remove support for NVHPC as CUDA device compiler [\#6987](https://github.com/kokkos/kokkos/pull/6987)
* Fix using CUDAToolkit for CMake 3.28.4 and higher [\#7062](https://github.com/kokkos/kokkos/pull/7062)
### Incompatibilities (i.e. breaking changes)
* Drop `Kokkos::Array` special treatment in `View`s [\#6906](https://github.com/kokkos/kokkos/pull/6906)
* Drop `Experimental::RawMemoryAllocationFailure` [\#7145](https://github.com/kokkos/kokkos/pull/7145)
### Deprecations
* Remove `Experimental::LayoutTiled` class template and deprecate `is_layouttiled` trait [\#6907](https://github.com/kokkos/kokkos/pull/6907)
* Deprecate `Kokkos::layout_iterate_type_selector` [\#7076](https://github.com/kokkos/kokkos/pull/7076)
* Deprecate specialization of `Kokkos::pair` for a single element [\#6947](https://github.com/kokkos/kokkos/pull/6947)
* Deprecate `deep_copy` of `UnorderedMap` of different size [\#6812](https://github.com/kokkos/kokkos/pull/6812)
* Deprecate trailing `Proxy` template argument of `Kokkos::Array` [\#6934](https://github.com/kokkos/kokkos/pull/6934)
* Deprecate implicit conversions of integers to `ChunkSize` [\#7151](https://github.com/kokkos/kokkos/pull/7151)
* Deprecate implicit conversions to execution spaces [\#7156](https://github.com/kokkos/kokkos/pull/7156)
### Bug Fixes
* Do not return a copy of the input functor in `Experimental::for_each` [\#6910](https://github.com/kokkos/kokkos/pull/6910)
* Fix `realloc` on views of non-default constructible element types [\#6993](https://github.com/kokkos/kokkos/pull/6993)
* Fix undefined behavior in `View` initialization or fill with zeros [\#7014](https://github.com/kokkos/kokkos/pull/7014)
* Fix `sort_by_key` on host execution spaces when building with NVCC [\#7059](https://github.com/kokkos/kokkos/pull/7059)
* Fix using shared libraries and -fvisibility=hidden [\#7065](https://github.com/kokkos/kokkos/pull/7065)
* Fix view reference counting when functor copy constructor throws in parallel dispatch [\#6289](https://github.com/kokkos/kokkos/pull/6289)
* Fix `initialize(InitializationSetting)` for handling `print_configuration` setting [\#7098](https://github.com/kokkos/kokkos/pull/7098)
* Thread safety fixes for the Serial and OpenMP backend [\#7080](https://github.com/kokkos/kokkos/pull/7080), [\#6151](https://github.com/kokkos/kokkos/pull/6151)
## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01)
### Backend and Architecture Enhancements:
#### HIP:
* MI300 support unified memory support [\#6877](https://github.com/kokkos/kokkos/pull/6877)
* MI300 support unified memory [\#6877](https://github.com/kokkos/kokkos/pull/6877)
### Bug Fixes
* Serial: Use the provided execution space instance in TeamPolicy [\#6951](https://github.com/kokkos/kokkos/pull/6951)

65
lib/kokkos/CITATION.cff Normal file
View File

@ -0,0 +1,65 @@
cff-version: 1.2.0
title: Kokkos
message: >-
If you use this software, please cite the overview paper
type: software
authors:
- name: The Kokkos authors
website: https://kokkos.org/community/team/
identifiers:
- type: url
website: https://kokkos.org/kokkos-core-wiki/citation.html
repository-code: 'https://github.com/kokkos/kokkos'
url: 'https://kokkos.org/'
license: Apache-2.0
preferred-citation:
type: article
authors:
- given-names: Christian R.
family-names: Trott
- given-names: Damien
family-names: Lebrun-Grandié
- given-names: Daniel
family-names: Arndt
- family-names: Ciesko
given-names: Jan
- given-names: Vinh
family-names: Dang
- family-names: Ellingwood
given-names: Nathan
- given-names: Rahulkumar
family-names: Gayatri
- given-names: Evan
family-names: Harvey
- given-names: Daisy S.
family-names: Hollman
- given-names: Dan
family-names: Ibanez
- given-names: Nevin
family-names: Liber
- given-names: Jonathan
family-names: Madsen
- given-names: Jeff
family-names: Miles
- given-names: David
family-names: Poliakoff
- given-names: Amy
family-names: Powell
- given-names: Sivasankaran
family-names: Rajamanickam
- given-names: Mikael
family-names: Simberg
- given-names: Dan
family-names: Sunderland
- given-names: Bruno
family-names: Turcksin
- given-names: Jeremiah
family-names: Wilke
doi: 10.1109/TPDS.2021.3097283
journal: IEEE Transactions on Parallel and Distributed Systems
start: 805
end: 817
title: "Kokkos 3: Programming Model Extensions for the Exascale Era"
volume: 33
issue: 4
year: 2022

View File

@ -150,7 +150,7 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 4)
set(Kokkos_VERSION_MINOR 3)
set(Kokkos_VERSION_MINOR 4)
set(Kokkos_VERSION_PATCH 1)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
message(STATUS "Kokkos version: ${Kokkos_VERSION}")

View File

@ -11,7 +11,7 @@ CXXFLAGS += $(SHFLAGS)
endif
KOKKOS_VERSION_MAJOR = 4
KOKKOS_VERSION_MINOR = 3
KOKKOS_VERSION_MINOR = 4
KOKKOS_VERSION_PATCH = 1
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
@ -21,11 +21,11 @@ KOKKOS_DEVICES ?= "OpenMP"
# Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace
# IBM: Power8,Power9
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC
KOKKOS_ARCH ?= ""
# Options: yes,no
KOKKOS_DEBUG ?= "no"
@ -41,7 +41,7 @@ KOKKOS_STANDALONE_CMAKE ?= "no"
# Default settings specific options.
# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,disable_malloc_async
KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
KOKKOS_CUDA_OPTIONS ?= "disable_malloc_async"
# Options: rdc
KOKKOS_HIP_OPTIONS ?= ""
@ -328,12 +328,43 @@ KOKKOS_INTERNAL_USE_ARCH_ICL := $(call kokkos_has_string,$(KOKKOS_ARCH),ICL)
KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX)
KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen)
# Traditionally, we supported, e.g., IntelGen9 instead of Intel_Gen9. The latter
# matches the CMake option but we also accept the former for backward-compatibility.
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen11)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen11)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen12LP)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9) \
+ $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11) \
+ $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP))
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen)
endif
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_DG1)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_XeHP)
endif
# Traditionally the architecture was called PVC instead of Intel_PVC. This
# version makes us accept IntelPVC and Intel_PVC as well.
KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC)
# NVIDIA based.
@ -394,7 +425,8 @@ KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX)
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2)
KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX)
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc))
KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv9-Grace)
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE) | bc))
# IBM based.
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8)
@ -433,7 +465,6 @@ KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103)
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
@ -758,6 +789,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV9_GRACE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON")
KOKKOS_CXXFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128
KOKKOS_LDFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")
@ -1119,11 +1158,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1103")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1103
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
@ -1216,6 +1250,8 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN), 0)
endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_MDSPAN")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY")
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)

View File

@ -81,7 +81,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
Kokkos_Threads_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
Kokkos_Threads_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Spinwait.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)

View File

@ -1,4 +1,4 @@
![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4)
[![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4)](https://kokkos.org)
# Kokkos: Core Libraries
@ -10,43 +10,66 @@ hierarchies and multiple types of execution resources. It currently can use
CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other
backends in development.
**Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem.**
**Kokkos Core is part of the [Kokkos C++ Performance Portability Programming Ecosystem](https://kokkos.org/about/abstract/).**
For the complete documentation, click below:
Kokkos is a [Linux Foundation](https://linuxfoundation.org) project.
# [kokkos.github.io/kokkos-core-wiki](https://kokkos.github.io/kokkos-core-wiki)
# Learning about Kokkos
## Learning about Kokkos
To start learning about Kokkos:
- [Kokkos Lectures](https://kokkos.github.io/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem capabilities.
- [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities.
- [Programming guide](https://kokkos.github.io/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch.
- [Programming guide](https://kokkos.org/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch.
- [API reference](https://kokkos.github.io/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.github.io/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.github.io/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.github.io/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.github.io/kokkos-core-wiki/API/alphabetical.html).
- [API reference](https://kokkos.org/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.org/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.org/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.org/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.org/kokkos-core-wiki/API/alphabetical.html).
- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
- [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/usecases.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
## Obtaining Kokkos
The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest).
The current release is [4.3.01](https://github.com/kokkos/kokkos/releases/tag/4.3.01).
```bash
curl -OJ -L https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz
# Or with wget
wget https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz
```
To clone the latest development version of Kokkos from GitHub:
```bash
git clone -b develop https://github.com/kokkos/kokkos.git
```
### Building Kokkos
To build Kokkos, you will need to have a C++ compiler that supports C++17 or later.
All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/requirements.html).
Building and installation instructions are described [here](https://kokkos.org/kokkos-core-wiki/building.html).
You can also install Kokkos using [Spack](https://spack.io/): `spack install kokkos`. [Available configuration options](https://packages.spack.io/package.html?name=kokkos) can be displayed using `spack info kokkos`.
## For the complete documentation: [kokkos.org/kokkos-core-wiki/](https://kokkos.org/kokkos-core-wiki/)
## Support
For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue.
For non-public questions send an email to: *crtrott(at)sandia.gov*
# Contributing to Kokkos
## Contributing
Please see [this page](https://kokkos.github.io/kokkos-core-wiki/contributing.html) for details on how to contribute.
Please see [this page](https://kokkos.org/kokkos-core-wiki/contributing.html) for details on how to contribute.
# Requirements, Building and Installing
## Citing Kokkos
All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.github.io/kokkos-core-wiki/requirements.html).
Please see the [following page](https://kokkos.org/kokkos-core-wiki/citation.html).
Building and installation instructions are described [here](https://kokkos.github.io/kokkos-core-wiki/building.html).
# Citing Kokkos
Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citation.html).
# License
## License
[![License](https://img.shields.io/badge/License-Apache--2.0_WITH_LLVM--exception-blue)](https://spdx.org/licenses/LLVM-exception.html)

View File

@ -189,6 +189,33 @@ void applyPermutation(const ExecutionSpace& space,
KOKKOS_LAMBDA(int i) { view(i) = view_copy(permutation(i)); });
}
// FIXME_NVCC: nvcc has trouble compiling lambdas inside a function with
// variadic templates (sort_by_key_via_sort). Switch to using functors instead.
template <typename Permute>
struct IotaFunctor {
Permute _permute;
KOKKOS_FUNCTION void operator()(int i) const { _permute(i) = i; }
};
template <typename Keys>
struct LessFunctor {
Keys _keys;
KOKKOS_FUNCTION bool operator()(int i, int j) const {
return _keys(i) < _keys(j);
}
};
// FIXME_NVCC+MSVC: We can't use a lambda instead of a functor which gave us
// "For this host platform/dialect, an extended lambda cannot be defined inside
// the 'if' or 'else' block of a constexpr if statement"
template <typename Keys, typename Comparator>
struct KeyComparisonFunctor {
Keys m_keys;
Comparator m_comparator;
KOKKOS_FUNCTION bool operator()(int i, int j) const {
return m_comparator(m_keys(i), m_keys(j));
}
};
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties,
class... MaybeComparator>
@ -207,10 +234,9 @@ void sort_by_key_via_sort(
n);
// iota
Kokkos::parallel_for(
"Kokkos::sort_by_key_via_sort::iota",
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
KOKKOS_LAMBDA(int i) { permute(i) = i; });
Kokkos::parallel_for("Kokkos::sort_by_key_via_sort::iota",
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
IotaFunctor<decltype(permute)>{permute});
using Layout =
typename Kokkos::View<unsigned int*, ExecutionSpace>::array_layout;
@ -228,16 +254,15 @@ void sort_by_key_via_sort(
Kokkos::DefaultHostExecutionSpace host_exec;
if constexpr (sizeof...(MaybeComparator) == 0) {
Kokkos::sort(
host_exec, host_permute,
KOKKOS_LAMBDA(int i, int j) { return host_keys(i) < host_keys(j); });
Kokkos::sort(host_exec, host_permute,
LessFunctor<decltype(host_keys)>{host_keys});
} else {
auto keys_comparator =
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
Kokkos::sort(
host_exec, host_permute, KOKKOS_LAMBDA(int i, int j) {
return keys_comparator(host_keys(i), host_keys(j));
});
host_exec, host_permute,
KeyComparisonFunctor<decltype(host_keys), decltype(keys_comparator)>{
host_keys, keys_comparator});
}
host_exec.fence("Kokkos::Impl::sort_by_key_via_sort: after host sort");
Kokkos::deep_copy(exec, permute, host_permute);
@ -262,16 +287,14 @@ void sort_by_key_via_sort(
}
#else
if constexpr (sizeof...(MaybeComparator) == 0) {
Kokkos::sort(
exec, permute,
KOKKOS_LAMBDA(int i, int j) { return keys(i) < keys(j); });
Kokkos::sort(exec, permute, LessFunctor<decltype(keys)>{keys});
} else {
auto keys_comparator =
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
Kokkos::sort(
exec, permute, KOKKOS_LAMBDA(int i, int j) {
return keys_comparator(keys(i), keys(j));
});
exec, permute,
KeyComparisonFunctor<decltype(keys), decltype(keys_comparator)>{
keys, keys_comparator});
}
#endif
}

View File

@ -29,49 +29,46 @@ namespace Experimental {
template <
class ExecutionSpace, class IteratorType, class UnaryFunctorType,
std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex,
IteratorType first, IteratorType last,
UnaryFunctorType functor) {
return Impl::for_each_exespace_impl(label, ex, first, last,
std::move(functor));
void for_each(const std::string& label, const ExecutionSpace& ex,
IteratorType first, IteratorType last, UnaryFunctorType functor) {
Impl::for_each_exespace_impl(label, ex, first, last, std::move(functor));
}
template <
class ExecutionSpace, class IteratorType, class UnaryFunctorType,
std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first,
IteratorType last, UnaryFunctorType functor) {
return Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default",
ex, first, last, std::move(functor));
void for_each(const ExecutionSpace& ex, IteratorType first, IteratorType last,
UnaryFunctorType functor) {
Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default", ex,
first, last, std::move(functor));
}
template <
class ExecutionSpace, class DataType, class... Properties,
class UnaryFunctorType,
std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
UnaryFunctorType functor) {
void for_each(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
UnaryFunctorType functor) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v),
std::move(functor));
Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v),
std::move(functor));
}
template <
class ExecutionSpace, class DataType, class... Properties,
class UnaryFunctorType,
std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
UnaryFunctorType for_each(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
UnaryFunctorType functor) {
void for_each(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
UnaryFunctorType functor) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex,
KE::begin(v), KE::end(v),
std::move(functor));
Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex,
KE::begin(v), KE::end(v), std::move(functor));
}
//
@ -82,24 +79,23 @@ UnaryFunctorType for_each(const ExecutionSpace& ex,
template <class TeamHandleType, class IteratorType, class UnaryFunctorType,
std::enable_if_t<Kokkos::is_team_handle_v<TeamHandleType>, int> = 0>
KOKKOS_FUNCTION UnaryFunctorType for_each(const TeamHandleType& teamHandle,
IteratorType first, IteratorType last,
UnaryFunctorType functor) {
return Impl::for_each_team_impl(teamHandle, first, last, std::move(functor));
KOKKOS_FUNCTION void for_each(const TeamHandleType& teamHandle,
IteratorType first, IteratorType last,
UnaryFunctorType functor) {
Impl::for_each_team_impl(teamHandle, first, last, std::move(functor));
}
template <class TeamHandleType, class DataType, class... Properties,
class UnaryFunctorType,
std::enable_if_t<Kokkos::is_team_handle_v<TeamHandleType>, int> = 0>
KOKKOS_FUNCTION UnaryFunctorType
for_each(const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType, Properties...>& v,
UnaryFunctorType functor) {
KOKKOS_FUNCTION void for_each(const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType, Properties...>& v,
UnaryFunctorType functor) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v),
std::move(functor));
Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v),
std::move(functor));
}
} // namespace Experimental

View File

@ -82,6 +82,11 @@ OutputIteratorType adjacent_difference_exespace_impl(
return first_dest;
}
#ifdef KOKKOS_ENABLE_DEBUG
// check for overlapping iterators
Impl::expect_no_overlap(first_from, last_from, first_dest);
#endif
// run
const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from);
@ -114,6 +119,11 @@ KOKKOS_FUNCTION OutputIteratorType adjacent_difference_team_impl(
return first_dest;
}
#ifdef KOKKOS_ENABLE_DEBUG
// check for overlapping iterators
Impl::expect_no_overlap(first_from, last_from, first_dest);
#endif
// run
const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from);

View File

@ -24,18 +24,21 @@ namespace Kokkos {
namespace Experimental {
namespace Impl {
template <class T>
class RandomAccessIterator;
template <typename T, typename enable = void>
struct is_admissible_to_kokkos_std_algorithms : std::false_type {};
template <typename T>
struct is_admissible_to_kokkos_std_algorithms<
T, std::enable_if_t< ::Kokkos::is_view<T>::value && T::rank() == 1 &&
(std::is_same<typename T::traits::array_layout,
Kokkos::LayoutLeft>::value ||
std::is_same<typename T::traits::array_layout,
Kokkos::LayoutRight>::value ||
std::is_same<typename T::traits::array_layout,
Kokkos::LayoutStride>::value)> >
T, std::enable_if_t<::Kokkos::is_view<T>::value && T::rank() == 1 &&
(std::is_same<typename T::traits::array_layout,
Kokkos::LayoutLeft>::value ||
std::is_same<typename T::traits::array_layout,
Kokkos::LayoutRight>::value ||
std::is_same<typename T::traits::array_layout,
Kokkos::LayoutStride>::value)>>
: std::true_type {};
template <class ViewType>
@ -58,6 +61,18 @@ using is_iterator = Kokkos::is_detected<iterator_category_t, T>;
template <class T>
inline constexpr bool is_iterator_v = is_iterator<T>::value;
template <typename ViewType>
struct is_kokkos_iterator : std::false_type {};
template <typename ViewType>
struct is_kokkos_iterator<RandomAccessIterator<ViewType>> {
static constexpr bool value =
is_admissible_to_kokkos_std_algorithms<ViewType>::value;
};
template <class T>
inline constexpr bool is_kokkos_iterator_v = is_kokkos_iterator<T>::value;
//
// are_iterators
//
@ -215,6 +230,38 @@ KOKKOS_INLINE_FUNCTION void expect_valid_range(IteratorType first,
(void)last;
}
//
// Check if kokkos iterators are overlapping
//
template <typename IteratorType1, typename IteratorType2>
KOKKOS_INLINE_FUNCTION void expect_no_overlap(
[[maybe_unused]] IteratorType1 first, [[maybe_unused]] IteratorType1 last,
[[maybe_unused]] IteratorType2 s_first) {
if constexpr (is_kokkos_iterator_v<IteratorType1> &&
is_kokkos_iterator_v<IteratorType2>) {
auto const view1 = first.view();
auto const view2 = s_first.view();
std::size_t stride1 = view1.stride(0);
std::size_t stride2 = view2.stride(0);
ptrdiff_t first_diff = view1.data() - view2.data();
// FIXME If strides are not identical, checks may not be made
// with the cost of O(1)
// Currently, checks are made only if strides are identical
// If first_diff == 0, there is already an overlap
if (stride1 == stride2 || first_diff == 0) {
[[maybe_unused]] bool is_no_overlap = (first_diff % stride1);
auto* first_pointer1 = view1.data();
auto* first_pointer2 = view2.data();
[[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first);
[[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first);
KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 ||
last_pointer1 <= first_pointer2 || is_no_overlap);
}
}
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos

View File

@ -150,8 +150,9 @@ KOKKOS_FUNCTION OutputIterator copy_if_team_impl(
return d_first + count;
}
#if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130)
#if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}

View File

@ -42,10 +42,9 @@ struct StdForEachFunctor {
};
template <class HandleType, class IteratorType, class UnaryFunctorType>
UnaryFunctorType for_each_exespace_impl(const std::string& label,
const HandleType& handle,
IteratorType first, IteratorType last,
UnaryFunctorType functor) {
void for_each_exespace_impl(const std::string& label, const HandleType& handle,
IteratorType first, IteratorType last,
UnaryFunctorType functor) {
// checks
Impl::static_assert_random_access_and_accessible(handle, first);
Impl::expect_valid_range(first, last);
@ -56,8 +55,6 @@ UnaryFunctorType for_each_exespace_impl(const std::string& label,
label, RangePolicy<HandleType>(handle, 0, num_elements),
StdForEachFunctor<IteratorType, UnaryFunctorType>(first, functor));
handle.fence("Kokkos::for_each: fence after operation");
return functor;
}
template <class ExecutionSpace, class IteratorType, class SizeType,
@ -75,7 +72,7 @@ IteratorType for_each_n_exespace_impl(const std::string& label,
}
for_each_exespace_impl(label, ex, first, last, std::move(functor));
// no neeed to fence since for_each_exespace_impl fences already
// no need to fence since for_each_exespace_impl fences already
return last;
}
@ -84,9 +81,9 @@ IteratorType for_each_n_exespace_impl(const std::string& label,
// team impl
//
template <class TeamHandleType, class IteratorType, class UnaryFunctorType>
KOKKOS_FUNCTION UnaryFunctorType
for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first,
IteratorType last, UnaryFunctorType functor) {
KOKKOS_FUNCTION void for_each_team_impl(const TeamHandleType& teamHandle,
IteratorType first, IteratorType last,
UnaryFunctorType functor) {
// checks
Impl::static_assert_random_access_and_accessible(teamHandle, first);
Impl::expect_valid_range(first, last);
@ -96,7 +93,6 @@ for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first,
TeamThreadRange(teamHandle, 0, num_elements),
StdForEachFunctor<IteratorType, UnaryFunctorType>(first, functor));
teamHandle.team_barrier();
return functor;
}
template <class TeamHandleType, class IteratorType, class SizeType,
@ -113,7 +109,7 @@ for_each_n_team_impl(const TeamHandleType& teamHandle, IteratorType first,
}
for_each_team_impl(teamHandle, first, last, std::move(functor));
// no neeed to fence since for_each_team_impl fences already
// no need to fence since for_each_team_impl fences already
return last;
}

View File

@ -59,6 +59,30 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
ptrdiff_t current_index)
: m_view(view), m_current_index(current_index) {}
#ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond
template <class OtherViewType>
requires(std::is_constructible_v<view_type, OtherViewType>) KOKKOS_FUNCTION
explicit(!std::is_convertible_v<OtherViewType, view_type>)
RandomAccessIterator(const RandomAccessIterator<OtherViewType>& other)
: m_view(other.m_view), m_current_index(other.m_current_index) {}
#else
template <
class OtherViewType,
std::enable_if_t<std::is_constructible_v<view_type, OtherViewType> &&
!std::is_convertible_v<OtherViewType, view_type>,
int> = 0>
KOKKOS_FUNCTION explicit RandomAccessIterator(
const RandomAccessIterator<OtherViewType>& other)
: m_view(other.m_view), m_current_index(other.m_current_index) {}
template <class OtherViewType,
std::enable_if_t<std::is_convertible_v<OtherViewType, view_type>,
int> = 0>
KOKKOS_FUNCTION RandomAccessIterator(
const RandomAccessIterator<OtherViewType>& other)
: m_view(other.m_view), m_current_index(other.m_current_index) {}
#endif
KOKKOS_FUNCTION
iterator_type& operator++() {
++m_current_index;
@ -152,9 +176,16 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
KOKKOS_FUNCTION
reference operator*() const { return m_view(m_current_index); }
KOKKOS_FUNCTION
view_type view() const { return m_view; }
private:
view_type m_view;
ptrdiff_t m_current_index = 0;
// Needed for the converting constructor accepting another iterator
template <class>
friend class RandomAccessIterator;
};
} // namespace Impl

View File

@ -175,8 +175,9 @@ KOKKOS_FUNCTION OutputIterator unique_copy_team_impl(
d_first + count);
}
#if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130)
#if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}

View File

@ -46,6 +46,44 @@ TEST_F(random_access_iterator_test, constructor) {
EXPECT_TRUE(true);
}
TEST_F(random_access_iterator_test, constructiblity) {
auto first_d = KE::begin(m_dynamic_view);
auto cfirst_d = KE::cbegin(m_dynamic_view);
static_assert(std::is_constructible_v<decltype(cfirst_d), decltype(first_d)>);
static_assert(
!std::is_constructible_v<decltype(first_d), decltype(cfirst_d)>);
[[maybe_unused]] decltype(cfirst_d) tmp_cfirst_d(first_d);
auto first_s = KE::begin(m_static_view);
auto cfirst_s = KE::cbegin(m_static_view);
static_assert(std::is_constructible_v<decltype(cfirst_s), decltype(first_s)>);
static_assert(
!std::is_constructible_v<decltype(first_s), decltype(cfirst_s)>);
[[maybe_unused]] decltype(cfirst_s) tmp_cfirst_s(first_s);
auto first_st = KE::begin(m_strided_view);
auto cfirst_st = KE::cbegin(m_strided_view);
static_assert(
std::is_constructible_v<decltype(cfirst_st), decltype(first_st)>);
static_assert(
!std::is_constructible_v<decltype(first_st), decltype(cfirst_st)>);
[[maybe_unused]] decltype(cfirst_st) tmp_cfirst_st(first_st);
// [FIXME] Better to have tests for the explicit specifier with an expression.
// As soon as View converting constructors are re-implemented with a
// conditional explicit, we may add those tests.
static_assert(std::is_constructible_v<decltype(first_s), decltype(first_d)>);
static_assert(std::is_constructible_v<decltype(first_st), decltype(first_d)>);
static_assert(std::is_constructible_v<decltype(first_d), decltype(first_s)>);
static_assert(std::is_constructible_v<decltype(first_st), decltype(first_s)>);
static_assert(std::is_constructible_v<decltype(first_d), decltype(first_st)>);
static_assert(std::is_constructible_v<decltype(first_s), decltype(first_st)>);
EXPECT_TRUE(true);
}
template <class IteratorType, class ValueType>
void test_random_access_it_verify(IteratorType it, ValueType gold_value) {
using view_t = Kokkos::View<typename IteratorType::value_type>;

View File

@ -69,7 +69,7 @@ void iota(ExecutionSpace const &space, ViewType const &v,
typename ViewType::value_type value = 0) {
using ValueType = typename ViewType::value_type;
Kokkos::parallel_for(
"ArborX::Algorithms::iota",
"Kokkos::Algorithms::iota",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, v.extent(0)),
KOKKOS_LAMBDA(int i) { v(i) = value + (ValueType)i; });
}
@ -87,6 +87,18 @@ TEST(TEST_CATEGORY, SortByKeyEmptyView) {
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values));
}
// Test #7036
TEST(TEST_CATEGORY, SortByKeyEmptyViewHost) {
using ExecutionSpace = Kokkos::DefaultHostExecutionSpace;
// does not matter if we use int or something else
Kokkos::View<int *, ExecutionSpace> keys("keys", 0);
Kokkos::View<float *, ExecutionSpace> values("values", 0);
ASSERT_NO_THROW(
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values));
}
TEST(TEST_CATEGORY, SortByKey) {
using ExecutionSpace = TEST_EXECSPACE;
using MemorySpace = typename ExecutionSpace::memory_space;

View File

@ -81,5 +81,114 @@ TEST(std_algorithms, is_admissible_to_std_algorithms) {
strided_view_3d_t>::value);
}
TEST(std_algorithms, expect_no_overlap) {
namespace KE = Kokkos::Experimental;
using value_type = double;
static constexpr size_t extent0 = 13;
//-------------
// 1d views
//-------------
using static_view_1d_t = Kokkos::View<value_type[extent0]>;
[[maybe_unused]] static_view_1d_t static_view_1d{
"std-algo-test-1d-contiguous-view-static"};
using dyn_view_1d_t = Kokkos::View<value_type*>;
[[maybe_unused]] dyn_view_1d_t dynamic_view_1d{
"std-algo-test-1d-contiguous-view-dynamic", extent0};
using strided_view_1d_t = Kokkos::View<value_type*, Kokkos::LayoutStride>;
Kokkos::LayoutStride layout1d{extent0, 2};
strided_view_1d_t strided_view_1d{"std-algo-test-1d-strided-view", layout1d};
// Overlapping because iterators are identical
#if defined(KOKKOS_ENABLE_DEBUG)
auto first_s = KE::begin(static_view_1d);
auto last_s = first_s + extent0;
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_s, last_s, first_s); },
"Kokkos contract violation:.*");
auto first_d = KE::begin(dynamic_view_1d);
auto last_d = first_d + extent0;
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_d, last_d, first_d); },
"Kokkos contract violation:.*");
auto first_st = KE::begin(strided_view_1d);
auto last_st = first_st + extent0;
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_st, last_st, first_st); },
"Kokkos contract violation:.*");
#endif
// Ranges are overlapped
static constexpr size_t sub_extent0 = 6, offset0 = 3;
std::pair<size_t, size_t> range0(0, sub_extent0),
range1(offset0, offset0 + sub_extent0);
#if defined(KOKKOS_ENABLE_DEBUG)
auto static_view_1d_0 = Kokkos::subview(static_view_1d, range0);
auto static_view_1d_1 = Kokkos::subview(static_view_1d, range1);
auto first_s0 = KE::begin(static_view_1d_0); // [0, 6)
auto last_s0 = first_s0 + static_view_1d_0.extent(0);
auto first_s1 = KE::begin(static_view_1d_1); // [3, 9)
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_s0, last_s0, first_s1); },
"Kokkos contract violation:.*");
auto dynamic_view_1d_0 = Kokkos::subview(dynamic_view_1d, range0);
auto dynamic_view_1d_1 = Kokkos::subview(dynamic_view_1d, range1);
auto first_d0 = KE::begin(dynamic_view_1d_0); // [0, 6)
auto last_d0 = first_d0 + dynamic_view_1d_0.extent(0);
auto first_d1 = KE::begin(dynamic_view_1d_1); // [3, 9)
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_d0, last_d0, first_d1); },
"Kokkos contract violation:.*");
#endif
auto strided_view_1d_0 = Kokkos::subview(strided_view_1d, range0);
auto strided_view_1d_1 = Kokkos::subview(strided_view_1d, range1);
auto first_st0 = KE::begin(strided_view_1d_0); // [0, 12)
auto last_st0 = first_st0 + strided_view_1d_0.extent(0);
auto first_st1 = KE::begin(strided_view_1d_1); // [3, 15)
// Does not overlap since offset (=3) is not divisible by stride (=2)
EXPECT_NO_THROW(
{ KE::Impl::expect_no_overlap(first_st0, last_st0, first_st1); });
// Iterating over the same range without overlapping
Kokkos::View<value_type[2][extent0], Kokkos::LayoutLeft> static_view_2d{
"std-algo-test-2d-contiguous-view-static"};
auto sub_static_view_1d_0 = Kokkos::subview(static_view_2d, 0, Kokkos::ALL);
auto sub_static_view_1d_1 = Kokkos::subview(static_view_2d, 1, Kokkos::ALL);
auto sub_first_s0 = KE::begin(sub_static_view_1d_0); // 0, 2, 4, ...
auto sub_last_s0 = sub_first_s0 + sub_static_view_1d_0.extent(0);
auto sub_first_s1 = KE::begin(sub_static_view_1d_1); // 1, 3, 5, ...
EXPECT_NO_THROW({
KE::Impl::expect_no_overlap(sub_first_s0, sub_last_s0, sub_first_s1);
});
Kokkos::View<value_type**, Kokkos::LayoutLeft> dynamic_view_2d{
"std-algo-test-2d-contiguous-view-dynamic", 2, extent0};
auto sub_dynamic_view_1d_0 = Kokkos::subview(dynamic_view_2d, 0, Kokkos::ALL);
auto sub_dynamic_view_1d_1 = Kokkos::subview(dynamic_view_2d, 1, Kokkos::ALL);
auto sub_first_d0 = KE::begin(sub_dynamic_view_1d_0); // 0, 2, 4, ...
auto sub_last_d0 = sub_first_d0 + sub_dynamic_view_1d_0.extent(0);
auto sub_first_d1 = KE::begin(sub_dynamic_view_1d_1); // 1, 3, 5, ...
EXPECT_NO_THROW({
KE::Impl::expect_no_overlap(sub_first_d0, sub_last_d0, sub_first_d1);
});
Kokkos::LayoutStride layout2d{2, 3, extent0, 2 * 3};
Kokkos::View<value_type**, Kokkos::LayoutStride> strided_view_2d{
"std-algo-test-2d-contiguous-view-strided", layout2d};
auto sub_strided_view_1d_0 = Kokkos::subview(strided_view_2d, 0, Kokkos::ALL);
auto sub_strided_view_1d_1 = Kokkos::subview(strided_view_2d, 1, Kokkos::ALL);
auto sub_first_st0 = KE::begin(sub_strided_view_1d_0); // 0, 6, 12, ...
auto sub_last_st0 = sub_first_st0 + sub_strided_view_1d_0.extent(0);
auto sub_first_st1 = KE::begin(sub_strided_view_1d_1); // 1, 7, 13, ...
EXPECT_NO_THROW({
KE::Impl::expect_no_overlap(sub_first_st0, sub_last_st0, sub_first_st1);
});
}
} // namespace stdalgos
} // namespace Test

View File

@ -85,7 +85,7 @@ struct TestFunctorA {
break;
}
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
case 2: {
auto it = KE::exclusive_scan(
@ -213,7 +213,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
break;
}
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
case 2:
case 3: {
auto it = exclusive_scan(KE::cbegin(rowFrom), KE::cend(rowFrom),
@ -242,7 +242,7 @@ template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void run_all_scenarios() {
for (int numTeams : teamSizesToTest) {
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
for (int apiId : {0, 1, 2, 3}) {
#else
for (int apiId : {0, 1}) {

View File

@ -52,7 +52,7 @@ struct TestFunctorA {
Kokkos::single(Kokkos::PerTeam(member),
[=, *this]() { m_returnsView(myRowIndex) = result; });
}
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
else if (m_apiPick == 2) {
using value_type = typename ViewType::value_type;
result = KE::is_sorted(member, KE::cbegin(myRowView), KE::cend(myRowView),
@ -179,7 +179,7 @@ template <class LayoutTag, class ValueType>
void run_all_scenarios(bool makeDataSortedOnPurpose) {
for (int numTeams : teamSizesToTest) {
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 5153}) {
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
for (int apiId : {0, 1, 2, 3}) {
#else
for (int apiId : {0, 1}) {

View File

@ -73,7 +73,7 @@ struct TestFunctorA {
m_distancesView(myRowIndex) = resultDist;
});
}
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
else if (m_apiPick == 2) {
using value_type = typename ViewType::value_type;
auto it = KE::is_sorted_until(member, KE::cbegin(myRowView),
@ -226,7 +226,7 @@ template <class LayoutTag, class ValueType>
void run_all_scenarios(const std::string& name, const std::vector<int>& cols) {
for (int numTeams : teamSizesToTest) {
for (const auto& numCols : cols) {
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
for (int apiId : {0, 1, 2, 3}) {
#else
for (int apiId : {0, 1}) {

View File

@ -59,7 +59,7 @@ struct TestFunctorA {
m_distancesView(myRowIndex) = resultDist;
});
}
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
else if (m_apiPick == 2) {
using value_type = typename ViewType::value_type;
auto it =
@ -170,7 +170,7 @@ void run_all_scenarios() {
}
TEST(std_algorithms_max_element_team_test, test) {
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
run_all_scenarios<DynamicTag, int>();
run_all_scenarios<StridedTwoRowsTag, double>();
run_all_scenarios<StridedThreeRowsTag, int>();

View File

@ -59,7 +59,7 @@ struct TestFunctorA {
m_distancesView(myRowIndex) = resultDist;
});
}
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
else if (m_apiPick == 2) {
using value_type = typename ViewType::value_type;
auto it =
@ -169,7 +169,7 @@ void run_all_scenarios() {
}
TEST(std_algorithms_min_element_team_test, test) {
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
run_all_scenarios<DynamicTag, int>();
run_all_scenarios<StridedTwoRowsTag, double>();
run_all_scenarios<StridedThreeRowsTag, int>();

View File

@ -66,7 +66,7 @@ struct TestFunctorA {
m_distancesView(myRowIndex, 1) = resultDist2;
});
}
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
else if (m_apiPick == 2) {
using value_type = typename ViewType::value_type;
auto itPair =
@ -188,7 +188,7 @@ void run_all_scenarios() {
}
TEST(std_algorithms_minmax_element_team_test, test) {
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
run_all_scenarios<DynamicTag, int>();
run_all_scenarios<StridedTwoRowsTag, double>();
run_all_scenarios<StridedThreeRowsTag, int>();

View File

@ -16,7 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp>
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
namespace Test {
namespace stdalgos {

View File

@ -16,7 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp>
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
namespace Test {
namespace stdalgos {

View File

@ -16,7 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp>
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
namespace Test {
namespace stdalgos {

View File

@ -16,7 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp>
#if not defined KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_OPENMPTARGET
namespace Test {
namespace stdalgos {

View File

@ -5,6 +5,6 @@ build_script:
- cmd: >-
mkdir build &&
cd build &&
cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc" -DKokkos_ENABLE_DEPRECATED_CODE_4=ON -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF &&
cmake c:\projects\source -DKokkos_ENABLE_IMPL_MDSPAN=OFF -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc" -DKokkos_ENABLE_DEPRECATED_CODE_4=ON -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF &&
cmake --build . --target install &&
ctest -C Debug --output-on-failure

View File

@ -4,7 +4,7 @@ KOKKOS_ADD_BENCHMARK_DIRECTORIES(gather)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(gups)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(launch_latency)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(stream)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(view_copy_constructor)
#FIXME_OPENMPTARGET - These two benchmarks cause ICE. Commenting them for now but a deeper analysis on the cause and a possible fix will follow.
IF(NOT Kokkos_ENABLE_OPENMPTARGET)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(policy_performance)

View File

@ -0,0 +1,4 @@
KOKKOS_ADD_EXECUTABLE(
view_copy_constructor
SOURCES view_copy_constructor.cpp
)

View File

@ -0,0 +1,46 @@
KOKKOS_DEVICES=Serial
KOKKOS_ARCH = ""
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
ifndef KOKKOS_PATH
KOKKOS_PATH = $(MAKEFILE_PATH)../..
endif
SRC = $(wildcard $(MAKEFILE_PATH)*.cpp)
HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp)
vpath %.cpp $(sort $(dir $(SRC)))
default: build
echo "Start Build"
CXX = clang++
EXE = view_copy_constructor.exe
CXXFLAGS ?= -Ofast
override CXXFLAGS += -I$(MAKEFILE_PATH)
DEPFLAGS = -M
LINK = ${CXX}
LINKFLAGS = -Ofast
KOKKOS_CXX_STANDARD=c++20
OBJ = $(notdir $(SRC:.cpp=.o))
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o view_copy_constructor.cuda view_copy_constructor.exe
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)

View File

@ -0,0 +1,310 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
// The function "test_view_collection" exposes the copy constructor
// and destructor overheads in Kokkos View objects
// Please see the lines marked by "NOTE".
#include <limits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <sys/time.h>
#include <Kokkos_Core.hpp>
#include <iostream>
// NVIEWS is the number of Kokkos View objects in our ViewCollection object
// We have chosen a large value of 40 to make it easier to see performance
// differences when using the likelihood attribute
#define NVIEWS 40
class ViewCollection {
public:
Kokkos::View<double*> v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28,
v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40;
double m_expected_sum;
double m_side_effect;
int m_N;
ViewCollection(int N)
: v1("v1", N),
v2("v2", N),
v3("v3", N),
v4("v4", N),
v5("v5", N),
v6("v6", N),
v7("v7", N),
v8("v8", N),
v9("v9", N),
v10("v10", N),
v11("v11", N),
v12("v12", N),
v13("v13", N),
v14("v14", N),
v15("v15", N),
v16("v16", N),
v17("v17", N),
v18("v18", N),
v19("v19", N),
v20("v20", N),
v21("v21", N),
v22("v22", N),
v23("v23", N),
v24("v24", N),
v25("v25", N),
v26("v26", N),
v27("v27", N),
v28("v28", N),
v29("v29", N),
v30("v30", N),
v31("v31", N),
v32("v32", N),
v33("v33", N),
v34("v34", N),
v35("v35", N),
v36("v36", N),
v37("v37", N),
v38("v38", N),
v39("v39", N),
v40("v40", N),
m_expected_sum(N * NVIEWS),
m_side_effect(0.0),
m_N(N) {
for (int i = 0; i < N; ++i) {
v1(i) = 1;
v2(i) = 1;
v3(i) = 1;
v4(i) = 1;
v5(i) = 1;
v6(i) = 1;
v7(i) = 1;
v8(i) = 1;
v9(i) = 1;
v10(i) = 1;
v11(i) = 1;
v12(i) = 1;
v13(i) = 1;
v14(i) = 1;
v15(i) = 1;
v16(i) = 1;
v17(i) = 1;
v18(i) = 1;
v19(i) = 1;
v20(i) = 1;
v21(i) = 1;
v22(i) = 1;
v23(i) = 1;
v24(i) = 1;
v25(i) = 1;
v26(i) = 1;
v27(i) = 1;
v28(i) = 1;
v29(i) = 1;
v30(i) = 1;
v31(i) = 1;
v32(i) = 1;
v33(i) = 1;
v34(i) = 1;
v35(i) = 1;
v36(i) = 1;
v37(i) = 1;
v38(i) = 1;
v39(i) = 1;
v40(i) = 1;
}
}
// The ADD_COPY_CONSTRUCTOR macro is helpful to compare time in the copy
// constructor between compilers. We have found that the GNU compiler
// is sometimes able to inline the default copy constructor.
#ifdef ADD_COPY_CONSTRUCTOR
__attribute__((noinline)) ViewCollection(const ViewCollection& other)
: v1(other.v1),
v2(other.v2),
v3(other.v3),
v4(other.v4),
v5(other.v5),
v6(other.v6),
v7(other.v7),
v8(other.v8),
v9(other.v9),
v10(other.v10),
v11(other.v11),
v12(other.v12),
v13(other.v13),
v14(other.v14),
v15(other.v15),
v16(other.v16),
v17(other.v17),
v18(other.v18),
v19(other.v19),
v20(other.v20),
v21(other.v21),
v22(other.v22),
v23(other.v23),
v24(other.v24),
v25(other.v25),
v26(other.v26),
v27(other.v27),
v28(other.v28),
v29(other.v29),
v30(other.v30),
v31(other.v31),
v32(other.v32),
v33(other.v33),
v34(other.v34),
v35(other.v35),
v36(other.v36),
v37(other.v37),
v38(other.v38),
v39(other.v39),
v40(other.v40),
m_expected_sum(other.m_expected_sum),
m_side_effect(other.m_side_effect),
m_N(other.m_N) {}
#endif
KOKKOS_INLINE_FUNCTION
double sum_views(int ii, bool execute_kernel) {
double result = 0.0;
if (execute_kernel) {
// This code is only executed when using the command line option -k
// The computation references all Kokkos views. This may help our
// effort to stop compilers from optimizing away the Kokkos views
for (int i = 0; i < m_N; ++i) {
result += v1(i) + v2(i) + v3(i) + v4(i) + v5(i) + v6(i) + v7(i) +
v8(i) + v9(i) + v10(i) + v11(i) + v12(i) + v13(i) + v14(i) +
v15(i) + v16(i) + v17(i) + v18(i) + v19(i) + v20(i) + v21(i) +
v22(i) + v23(i) + v24(i) + v25(i) + v26(i) + v27(i) + v28(i) +
v29(i) + v30(i) + v31(i) + v32(i) + v33(i) + v34(i) + v35(i) +
v36(i) + v37(i) + v38(i) + v39(i) + v40(i);
}
} else {
result = m_expected_sum;
}
// This statement introduces a side effect that may help our effort to
// stop compilers from optimizing away the temporary ViewCollection object
m_side_effect = result * (ii + 1);
return result;
}
};
void test_view_collection_kk(int N, int num_iter, bool execute_kernel) {
ViewCollection view_collection(N);
Kokkos::Timer view_collection_timer;
double max_value = 0.0;
// Max Reduction boilerplate code taken from slide 53 of
// kokkos-tutorials/LectureSeries/KokkosTutorial_02_ViewsAndSpaces.pdf
Kokkos::parallel_reduce(
"collection-reduction", num_iter,
KOKKOS_LAMBDA(int i, double& valueToUpdate) {
// NOTE: The following lines expose the Kokkos View overheads
ViewCollection tmp_view_collection = view_collection;
double my_value = tmp_view_collection.sum_views(i, execute_kernel);
if (my_value > valueToUpdate) valueToUpdate = my_value;
},
Kokkos::Max<double>(max_value));
double view_collection_time = view_collection_timer.seconds();
bool success = std::fabs(max_value - N * NVIEWS) < 1.E-6;
std::cout << "View Time = " << view_collection_time << " seconds"
<< std::endl;
if (success) {
std::cout << "Kokkos run:" << std::endl;
std::cout << "SUCCESS" << std::endl;
} else {
std::cout << "FAILURE" << std::endl;
}
}
void test_view_collection_serial(int N, int num_iter, bool execute_kernel) {
ViewCollection view_collection(N);
Kokkos::Timer view_collection_timer;
double max_value = 0.0;
// Max Reduction boilerplate code taken from slide 53 of
// kokkos-tutorials/LectureSeries/KokkosTutorial_02_ViewsAndSpaces.pdf
for (int i = 0; i < num_iter; ++i) {
// NOTE: The following lines expose the Kokkos View overheads
ViewCollection tmp_view_collection = view_collection;
double my_value = tmp_view_collection.sum_views(i, execute_kernel);
if (my_value > max_value) max_value = my_value;
}
double view_collection_time = view_collection_timer.seconds();
bool success = std::fabs(max_value - N * NVIEWS) < 1.E-6;
std::cout << "View Time 2 = " << view_collection_time << " seconds"
<< std::endl;
if (success) {
std::cout << "Serial run:" << std::endl;
std::cout << "SUCCESS" << std::endl;
} else {
std::cout << "FAILURE" << std::endl;
}
}
int main(int argc, char* argv[]) {
// The benchmark is only testing reference counting for views on host.
#if defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_SERIAL) || \
defined(KOKKOS_ENABLE_THREADS) || defined(KOKKOS_ENABLE_HPX)
int N = 1;
int num_iter = 1 << 27;
bool execute_kernel = false;
for (int i = 0; i < argc; i++) {
if ((strcmp(argv[i], "-N") == 0)) {
N = atoi(argv[++i]);
if (N < 1) {
std::cout << "Array extent must be >= 1" << std::endl;
exit(1);
}
} else if (strcmp(argv[i], "-i") == 0) {
num_iter = atoi(argv[++i]);
if (num_iter < 1) {
std::cout << "Number of iterations must be >= 1" << std::endl;
exit(1);
}
} else if (strcmp(argv[i], "-k") == 0) {
execute_kernel = true;
} else if ((strcmp(argv[i], "-h") == 0)) {
printf(" Options:\n");
printf(" -N <int>: Array extent\n");
printf(" -i <int>: Number of iterations\n");
printf(" -k: Execute the summation kernel\n");
printf(" -h: Print this message\n\n");
exit(1);
}
}
std::cout << "Array extent = " << N << std::endl;
std::cout << "Iterations = " << num_iter << std::endl;
std::cout << "Execute summation kernel = " << std::boolalpha << execute_kernel
<< std::noboolalpha << std::endl;
// Test inside a Kokkos kernel.
Kokkos::initialize(argc, argv);
{ test_view_collection_kk(N, num_iter, execute_kernel); }
// Test outside Kokkos kernel.
test_view_collection_serial(N, num_iter, execute_kernel);
Kokkos::finalize();
#endif
return 0;
}

View File

@ -233,7 +233,7 @@ do
cuda_args="$cuda_args $1"
;;
#Handle more known nvcc args
--extended-lambda|--expt-extended-lambda|--expt-relaxed-constexpr|--Wno-deprecated-gpu-targets|-Wno-deprecated-gpu-targets|-allow-unsupported-compiler|--allow-unsupported-compiler)
--extended-lambda|--expt-extended-lambda|--expt-relaxed-constexpr|--Wno-deprecated-gpu-targets|-Wno-deprecated-gpu-targets|-allow-unsupported-compiler|--allow-unsupported-compiler|--disable-warnings)
cuda_args="$cuda_args $1"
;;
#Handle known nvcc args that have an argument

View File

@ -1,6 +1,5 @@
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
LIB_OPTIONAL_TPLS Pthread CUDA HWLOC DLlib
TEST_OPTIONAL_TPLS CUSPARSE
)
TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib)

View File

@ -225,8 +225,13 @@ FUNCTION(kokkos_compilation)
# if built w/o CUDA support, we want to basically make this a no-op
SET(_Kokkos_ENABLE_CUDA @Kokkos_ENABLE_CUDA@)
IF(CMAKE_VERSION VERSION_GREATER_EQUAL 3.17)
SET(MAYBE_CURRENT_INSTALLATION_ROOT "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../../..")
ENDIF()
# search relative first and then absolute
SET(_HINTS "${CMAKE_CURRENT_LIST_DIR}/../.." "@CMAKE_INSTALL_PREFIX@")
SET(_HINTS "${MAYBE_CURRENT_INSTALLATION_ROOT}" "@CMAKE_INSTALL_PREFIX@")
# find kokkos_launch_compiler
FIND_PROGRAM(Kokkos_COMPILE_LAUNCHER

View File

@ -37,6 +37,7 @@
#cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA // deprecated
#cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
#cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS
#cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY
@ -52,6 +53,8 @@
#cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION // deprecated
#cmakedefine KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION
#cmakedefine KOKKOS_ENABLE_IMPL_MDSPAN
#cmakedefine KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY
#cmakedefine KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND
#cmakedefine KOKKOS_ENABLE_ATOMICS_BYPASS
/* TPL Settings */
@ -65,6 +68,7 @@
#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX
#cmakedefine KOKKOS_ARCH_ARMV81
#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX2
#cmakedefine KOKKOS_ARCH_ARMV9_GRACE
#cmakedefine KOKKOS_ARCH_A64FX
#cmakedefine KOKKOS_ARCH_AVX
#cmakedefine KOKKOS_ARCH_AVX2
@ -116,7 +120,6 @@
#cmakedefine KOKKOS_ARCH_AMD_GFX942
#cmakedefine KOKKOS_ARCH_AMD_GFX1030
#cmakedefine KOKKOS_ARCH_AMD_GFX1100
#cmakedefine KOKKOS_ARCH_AMD_GFX1103
#cmakedefine KOKKOS_ARCH_AMD_GPU
#cmakedefine KOKKOS_ARCH_VEGA // deprecated
#cmakedefine KOKKOS_ARCH_VEGA906 // deprecated

View File

@ -7,37 +7,38 @@ IF (NOT CUDAToolkit_ROOT)
ENDIF()
ENDIF()
# FIXME CMake 3.28.4 creates more targets than we export
IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0" AND CMAKE_VERSION VERSION_LESS "3.28.4")
find_package(CUDAToolkit)
ELSE()
include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)
IF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC AND CMAKE_VERSION VERSION_LESS "3.20.1")
MESSAGE(FATAL_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1")
ENDIF()
IF (TARGET CUDA::cudart)
SET(FOUND_CUDART TRUE)
KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cudart)
ELSE()
SET(FOUND_CUDART FALSE)
ENDIF()
IF (TARGET CUDA::cuda_driver)
SET(FOUND_CUDA_DRIVER TRUE)
KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cuda_driver)
ELSE()
SET(FOUND_CUDA_DRIVER FALSE)
ENDIF()
include(FindPackageHandleStandardArgs)
IF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC)
SET(KOKKOS_CUDA_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1")
ELSE()
SET(KOKKOS_CUDA_ERROR DEFAULT_MSG)
ENDIF()
FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA ${KOKKOS_CUDA_ERROR} FOUND_CUDART FOUND_CUDA_DRIVER)
IF (FOUND_CUDA_DRIVER AND FOUND_CUDART)
IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0")
find_package(CUDAToolkit REQUIRED)
KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE
LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart
)
KOKKOS_EXPORT_CMAKE_TPL(CUDAToolkit REQUIRED)
ELSE()
include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)
IF (TARGET CUDA::cudart)
SET(FOUND_CUDART TRUE)
KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cudart)
ELSE()
SET(FOUND_CUDART FALSE)
ENDIF()
IF (TARGET CUDA::cuda_driver)
SET(FOUND_CUDA_DRIVER TRUE)
KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cuda_driver)
ELSE()
SET(FOUND_CUDA_DRIVER FALSE)
ENDIF()
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA ${DEFAULT_MSG} FOUND_CUDART FOUND_CUDA_DRIVER)
IF (FOUND_CUDA_DRIVER AND FOUND_CUDART)
KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE
LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart
)
ENDIF()
ENDIF()

View File

@ -35,7 +35,6 @@ IF(NOT _CUDA_FAILURE)
GLOBAL_SET(TPL_CUDA_LIBRARY_DIRS)
GLOBAL_SET(TPL_CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE})
GLOBAL_SET(TPL_CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_cufft_LIBRARY})
KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
ELSE()
SET(TPL_ENABLE_CUDA OFF)
ENDIF()

View File

@ -1,26 +0,0 @@
#@HEADER
# ************************************************************************
#
# Kokkos v. 4.0
# Copyright (2022) National Technology & Engineering
# Solutions of Sandia, LLC (NTESS).
#
# Under the terms of Contract DE-NA0003525 with NTESS,
# the U.S. Government retains certain rights in this software.
#
# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
#
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ************************************************************************
# @HEADER
#include(${TRIBITS_DEPS_DIR}/CUDA.cmake)
#IF (TPL_ENABLE_CUDA)
# GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
# GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
# GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
# KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
#ENDIF()

View File

@ -118,14 +118,6 @@ FUNCTION(KOKKOS_ADD_TEST)
ENDIF()
ENDFUNCTION()
FUNCTION(KOKKOS_ADD_ADVANCED_TEST)
if (KOKKOS_HAS_TRILINOS)
TRIBITS_ADD_ADVANCED_TEST(${ARGN})
else()
# TODO Write this
endif()
ENDFUNCTION()
MACRO(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME)
ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME})
TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES})

View File

@ -28,6 +28,7 @@ KOKKOS_CHECK_DEPRECATED_OPTIONS(
#-------------------------------------------------------------------------------
SET(KOKKOS_ARCH_LIST)
include(CheckCXXCompilerFlag)
KOKKOS_DEPRECATED_LIST(ARCH ARCH)
@ -49,6 +50,7 @@ DECLARE_AND_CHECK_HOST_ARCH(ARMV81 "ARMv8.1 Compatible CPU")
DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX "ARMv8 Cavium ThunderX CPU")
DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX2 "ARMv8 Cavium ThunderX2 CPU")
DECLARE_AND_CHECK_HOST_ARCH(A64FX "ARMv8.2 with SVE Support")
DECLARE_AND_CHECK_HOST_ARCH(ARMV9_GRACE "ARMv9 NVIDIA Grace CPU")
DECLARE_AND_CHECK_HOST_ARCH(SNB "Intel Sandy/Ivy Bridge CPUs")
DECLARE_AND_CHECK_HOST_ARCH(HSW "Intel Haswell CPUs")
DECLARE_AND_CHECK_HOST_ARCH(BDW "Intel Broadwell Xeon E-class CPUs")
@ -101,9 +103,9 @@ LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx90a gfx908 gfx908)
LIST(APPEND SUPPORTED_AMD_GPUS MI50/60 MI50/60)
LIST(APPEND SUPPORTED_AMD_ARCHS VEGA906 AMD_GFX906)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx906 gfx906)
LIST(APPEND SUPPORTED_AMD_GPUS PHOENIX RX7900XTX V620/W6800 V620/W6800)
LIST(APPEND SUPPORTED_AMD_ARCHS AMD_GFX1103 AMD_GFX1100 NAVI1030 AMD_GFX1030)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1103 gfx1100 gfx1030 gfx1030)
LIST(APPEND SUPPORTED_AMD_GPUS RX7900XTX RX7900XTX V620/W6800 V620/W6800)
LIST(APPEND SUPPORTED_AMD_ARCHS NAVI1100 AMD_GFX1100 NAVI1030 AMD_GFX1030)
LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1100 gfx1100 gfx1030 gfx1030)
#FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17
FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS)
@ -189,12 +191,6 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
ELSEIF(CUDAToolkit_BIN_DIR)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS --cuda-path=${CUDAToolkit_BIN_DIR}/..)
ENDIF()
ELSEIF (KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
SET(CUDA_ARCH_FLAG "-gpu")
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -cuda)
IF (KOKKOS_ENABLE_CUDA) # FIXME ideally unreachable when CUDA not enabled
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS -cuda)
ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
SET(CUDA_ARCH_FLAG "-arch")
ENDIF()
@ -209,6 +205,11 @@ ENDIF()
#------------------------------- KOKKOS_HIP_OPTIONS ---------------------------
KOKKOS_OPTION(IMPL_AMDGPU_FLAGS "" STRING "Set compiler flags for AMD GPUs")
KOKKOS_OPTION(IMPL_AMDGPU_LINK "" STRING "Set linker flags for AMD GPUs")
MARK_AS_ADVANCED(Kokkos_IMPL_AMDGPU_FLAGS)
MARK_AS_ADVANCED(Kokkos_IMPL_AMDGPU_LINK)
#clear anything that might be in the cache
GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS)
IF(KOKKOS_ENABLE_HIP)
@ -301,6 +302,20 @@ IF (KOKKOS_ARCH_A64FX)
)
ENDIF()
IF (KOKKOS_ARCH_ARMV9_GRACE)
SET(KOKKOS_ARCH_ARM_NEON ON)
check_cxx_compiler_flag("-mcpu=neoverse-n2" COMPILER_SUPPORTS_NEOVERSE_N2)
check_cxx_compiler_flag("-msve-vector-bits=128" COMPILER_SUPPORTS_SVE_VECTOR_BITS)
IF (COMPILER_SUPPORTS_NEOVERSE_N2 AND COMPILER_SUPPORTS_SVE_VECTOR_BITS)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
DEFAULT -mcpu=neoverse-n2 -msve-vector-bits=128
)
ELSE()
MESSAGE(WARNING "Compiler does not support ARMv9 Grace architecture")
ENDIF()
ENDIF()
IF (KOKKOS_ARCH_ZEN)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
@ -535,17 +550,17 @@ IF (KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC)
SET(KOKKOS_ARCH_AVX512XEON OFF)
ENDIF()
# FIXME_NVCC nvcc doesn't seem to support Arm Neon.
IF(KOKKOS_ARCH_ARM_NEON AND KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
UNSET(KOKKOS_ARCH_ARM_NEON)
ENDIF()
IF (NOT KOKKOS_COMPILE_LANGUAGE STREQUAL CUDA)
IF (KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
COMPILER_SPECIFIC_FLAGS(
Clang -fcuda-rdc
NVIDIA --relocatable-device-code=true
NVHPC -gpu=rdc
)
ELSEIF(KOKKOS_ENABLE_CUDA)
COMPILER_SPECIFIC_FLAGS(
NVHPC -gpu=nordc
)
ENDIF()
ENDIF()
@ -571,7 +586,7 @@ IF (KOKKOS_ENABLE_HIP)
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fgpu-rdc
)
IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC AND NOT KOKKOS_IMPL_AMDGPU_FLAGS)
COMPILER_SPECIFIC_LINK_OPTIONS(
DEFAULT --hip-link
)
@ -654,15 +669,9 @@ FUNCTION(CHECK_CUDA_ARCH ARCH FLAG)
IF(KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
SET(CMAKE_CUDA_ARCHITECTURES ${KOKKOS_CUDA_ARCHITECTURES} PARENT_SCOPE)
ELSE()
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
STRING(REPLACE "sm_" "cc" NVHPC_CUDA_ARCH ${FLAG})
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${NVHPC_CUDA_ARCH}")
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${NVHPC_CUDA_ARCH}")
ELSE()
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
ENDIF()
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
ENDIF()
ENDIF()
ENDIF()
@ -704,14 +713,16 @@ FUNCTION(CHECK_AMDGPU_ARCH ARCH FLAG)
MESSAGE(WARNING "Given AMD GPU architecture ${ARCH}, but Kokkos_ENABLE_HIP, Kokkos_ENABLE_SYCL, Kokkos_ENABLE_OPENACC, and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.")
UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE)
ELSE()
IF(KOKKOS_ENABLE_HIP)
SET(KOKKOS_HIP_ARCHITECTURES ${FLAG} PARENT_SCOPE)
ENDIF()
SET(KOKKOS_AMDGPU_ARCH_FLAG ${FLAG} PARENT_SCOPE)
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}")
IF(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}")
ENDIF()
IF(KOKKOS_ENABLE_HIP)
SET(KOKKOS_HIP_ARCHITECTURES ${FLAG} PARENT_SCOPE)
ENDIF()
IF(NOT KOKKOS_IMPL_AMDGPU_FLAGS)
SET(KOKKOS_AMDGPU_ARCH_FLAG ${FLAG} PARENT_SCOPE)
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}")
ENDIF()
IF(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}")
ENDIF()
ENDIF()
ENDIF()
ENDFUNCTION()
@ -724,6 +735,15 @@ FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS)
CHECK_AMDGPU_ARCH(${ARCH} ${FLAG})
ENDFOREACH()
IF(KOKKOS_IMPL_AMDGPU_FLAGS)
IF (NOT AMDGPU_ARCH_ALREADY_SPECIFIED)
MESSAGE(FATAL_ERROR "When IMPL_AMDGPU_FLAGS is set the architecture autodectection is disabled. "
"Please explicitly set the GPU architecture.")
ENDIF()
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${KOKKOS_IMPL_AMDGPU_FLAGS}")
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${KOKKOS_IMPL_AMDGPU_LINK}")
ENDIF()
MACRO(SET_AND_CHECK_AMD_ARCH ARCH FLAG)
KOKKOS_SET_OPTION(ARCH_${ARCH} ON)
CHECK_AMDGPU_ARCH(${ARCH} ${FLAG})
@ -984,7 +1004,7 @@ IF (KOKKOS_ARCH_HOPPER90)
ENDIF()
#HIP detection of gpu arch
IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED)
IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED AND NOT KOKKOS_IMPL_AMDGPU_FLAGS)
FIND_PROGRAM(ROCM_ENUMERATOR rocm_agent_enumerator)
IF(NOT ROCM_ENUMERATOR)
MESSAGE(FATAL_ERROR "Autodetection of AMD GPU architecture not possible as "

View File

@ -42,12 +42,8 @@ IF(Kokkos_ENABLE_CUDA)
# If launcher was found and nvcc_wrapper was not specified as
# compiler and `CMAKE_CXX_COMPILIER_LAUNCHER` is not set, set to use launcher.
# Will ensure CMAKE_CXX_COMPILER is replaced by nvcc_wrapper
IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang
AND NOT (Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC))
IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
IF(CMAKE_CXX_COMPILER_LAUNCHER)
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
MESSAGE(STATUS "Using nvc++ as device compiler requires Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER=ON!")
ENDIF()
MESSAGE(FATAL_ERROR "Cannot use CMAKE_CXX_COMPILER_LAUNCHER if the CMAKE_CXX_COMPILER is not able to compile CUDA code, i.e. nvcc_wrapper or clang++!")
ENDIF()
# the first argument to launcher is always the C++ compiler defined by cmake
@ -149,56 +145,85 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Fujitsu)
ENDIF()
# Enforce the minimum compilers supported by Kokkos.
SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) 8.0.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) 10.0.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) 15.0.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 8.2.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 19.0.5 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) 2021.1.1 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) 2023.0.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 11.0.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC 5.2.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVHPC/PGI 22.3 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n MSVC 19.29 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n XL/XLClang not supported")
IF(NOT CMAKE_CXX_STANDARD)
SET(CMAKE_CXX_STANDARD 17)
ENDIF()
IF(CMAKE_CXX_STANDARD EQUAL 17)
SET(KOKKOS_CLANG_CPU_MINIMUM 8.0.0)
SET(KOKKOS_CLANG_CUDA_MINIMUM 10.0.0)
SET(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
SET(KOKKOS_GCC_MINIMUM 8.2.0)
SET(KOKKOS_INTEL_MINIMUM 19.0.5)
SET(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2021.1.1)
SET(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0)
SET(KOKKOS_NVCC_MINIMUM 11.0.0)
SET(KOKKOS_HIPCC_MINIMUM 5.2.0)
SET(KOKKOS_NVHPC_MINIMUM 22.3)
SET(KOKKOS_MSVC_MINIMUM 19.29)
ELSE()
SET(KOKKOS_CLANG_CPU_MINIMUM 14.0.0)
SET(KOKKOS_CLANG_CUDA_MINIMUM 14.0.0)
SET(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
SET(KOKKOS_GCC_MINIMUM 10.1.0)
SET(KOKKOS_INTEL_MINIMUM "not supported")
SET(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2022.0.0)
SET(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0)
SET(KOKKOS_NVCC_MINIMUM 12.0.0)
SET(KOKKOS_HIPCC_MINIMUM 5.2.0)
SET(KOKKOS_NVHPC_MINIMUM 22.3)
SET(KOKKOS_MSVC_MINIMUM 19.30)
ENDIF()
SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos for C++${CMAKE_CXX_STANDARD}. Required minimum compiler versions:")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) ${KOKKOS_CLANG_CPU_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) ${KOKKOS_CLANG_CUDA_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) ${KOKKOS_CLANG_OPENMPTARGET_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC ${KOKKOS_GCC_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel ${KOKKOS_INTEL_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) ${KOKKOS_INTEL_LLVM_CPU_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC ${KOKKOS_NVCC_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC ${KOKKOS_HIPCC_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVHPC/PGI ${KOKKOS_NVHPC_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n MSVC ${KOKKOS_MSVC_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n XL/XLClang not supported")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\nCompiler: ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION}\n")
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND NOT Kokkos_ENABLE_CUDA)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 8.0.0)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_CLANG_CPU_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_CUDA)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 10.0.0)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_CLANG_CUDA_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL GNU)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 8.2.0)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_GCC_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 19.0.5)
IF((NOT CMAKE_CXX_STANDARD EQUAL 17) OR (KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_MINIMUM}))
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND NOT Kokkos_ENABLE_SYCL)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 2021.1.1)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_CPU_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND Kokkos_ENABLE_SYCL)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 2023.0.0)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 11.0.0)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_NVCC_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Kokkos turns off CXX extensions" FORCE)
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 5.2.0)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_HIPCC_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 22.3)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_NVHPC_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
# Treat PGI internally as NVHPC to simplify handling both compilers.
@ -206,13 +231,13 @@ ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NV
# backward-compatible to pgc++.
SET(KOKKOS_CXX_COMPILER_ID NVHPC CACHE STRING INTERNAL FORCE)
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 19.29)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_MSVC_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL XL OR KOKKOS_CXX_COMPILER_ID STREQUAL XLClang)
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_OPENMPTARGET)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 15.0.0)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS KOKKOS_CLANG_OPENMPTARGET_MINIMUM)
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
ENDIF()

View File

@ -48,6 +48,8 @@ KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to allow lambda
# resolved but we keep the option around a bit longer to be safe.
KOKKOS_ENABLE_OPTION(IMPL_CUDA_MALLOC_ASYNC ON "Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2)")
KOKKOS_ENABLE_OPTION(IMPL_NVHPC_AS_DEVICE_COMPILER OFF "Whether to allow nvc++ as Cuda device compiler")
KOKKOS_ENABLE_OPTION(IMPL_CUDA_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for CUDA")
KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_4 ON "Whether code deprecated in major release 4 is available" )
KOKKOS_ENABLE_OPTION(DEPRECATION_WARNINGS ON "Whether to emit deprecation warnings" )
KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP")
@ -75,8 +77,12 @@ KOKKOS_ENABLE_OPTION(IMPL_HIP_UNIFIED_MEMORY OFF "Whether to leverage unified me
# This option will go away eventually, but allows fallback to old implementation when needed.
KOKKOS_ENABLE_OPTION(DESUL_ATOMICS_EXTERNAL OFF "Whether to use an external desul installation")
KOKKOS_ENABLE_OPTION(ATOMICS_BYPASS OFF "**NOT RECOMMENDED** Whether to make atomics non-atomic for non-threaded MPI-only use cases")
KOKKOS_ENABLE_OPTION(IMPL_REF_COUNT_BRANCH_UNLIKELY ON "Whether to use the C++20 `[[unlikely]]` attribute in the view reference counting")
mark_as_advanced(Kokkos_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY)
KOKKOS_ENABLE_OPTION(IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND OFF "Whether to enable a workaround for invalid use of View of Views that causes program hang on destruction.")
mark_as_advanced(Kokkos_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND)
KOKKOS_ENABLE_OPTION(IMPL_MDSPAN OFF "Whether to enable experimental mdspan support")
KOKKOS_ENABLE_OPTION(IMPL_MDSPAN ON "Whether to enable experimental mdspan support")
KOKKOS_ENABLE_OPTION(MDSPAN_EXTERNAL OFF BOOL "Whether to use an external version of mdspan")
KOKKOS_ENABLE_OPTION(IMPL_SKIP_COMPILER_MDSPAN ON BOOL "Whether to use an internal version of mdspan even if the compiler supports mdspan")
mark_as_advanced(Kokkos_ENABLE_IMPL_MDSPAN)
@ -131,7 +137,7 @@ FUNCTION(check_device_specific_options)
ENDIF()
ENDFUNCTION()
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC IMPL_CUDA_UNIFIED_MEMORY)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HIP OPTIONS HIP_RELOCATABLE_DEVICE_CODE)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS IMPL_HPX_ASYNC_DISPATCH)

View File

@ -709,7 +709,12 @@ MACRO(kokkos_find_imported NAME)
ENDIF()
IF (NOT TPL_LIBRARY_SUFFIXES)
SET(TPL_LIBRARY_SUFFIXES lib lib64)
SET(TPL_LIBRARY_SUFFIXES lib)
IF(KOKKOS_IMPL_32BIT)
LIST(APPEND TPL_LIBRARY_SUFFIXES lib32)
ELSE()
LIST(APPEND TPL_LIBRARY_SUFFIXES lib64)
ENDIF()
ENDIF()
SET(${NAME}_INCLUDE_DIRS)

View File

@ -124,12 +124,8 @@ IF(KOKKOS_ENABLE_CUDA)
ELSEIF(CMAKE_CXX_EXTENSIONS)
MESSAGE(FATAL_ERROR "Compiling CUDA code with clang doesn't support C++ extensions. Set -DCMAKE_CXX_EXTENSIONS=OFF")
ENDIF()
ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND NOT (Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC))
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. To allow nvc++ as Cuda compiler, Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER=ON must be set!")
ELSE()
MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang or NVC++ or use kokkos_launch_compiler, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}")
ENDIF()
ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang or use kokkos_launch_compiler, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}")
ENDIF()
ENDIF()

View File

@ -103,13 +103,19 @@ if (Kokkos_ENABLE_IMPL_MDSPAN AND Kokkos_ENABLE_MDSPAN_EXTERNAL)
endif()
IF (Kokkos_ENABLE_OPENMP)
find_package(OpenMP REQUIRED)
find_package(OpenMP REQUIRED COMPONENTS CXX)
# FIXME_TRILINOS Trilinos doesn't allow for Kokkos to use find_dependency
# so we just append the flags here instead of linking with the OpenMP target.
IF(KOKKOS_HAS_TRILINOS)
COMPILER_SPECIFIC_FLAGS(DEFAULT ${OpenMP_CXX_FLAGS})
ELSE()
KOKKOS_EXPORT_CMAKE_TPL(OpenMP REQUIRED)
KOKKOS_EXPORT_CMAKE_TPL(OpenMP REQUIRED COMPONENTS CXX)
ENDIF()
IF(Kokkos_ENABLE_HIP AND KOKKOS_COMPILE_LANGUAGE STREQUAL HIP)
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS ${OpenMP_CXX_FLAGS})
ENDIF()
IF(Kokkos_ENABLE_CUDA AND KOKKOS_COMPILE_LANGUAGE STREQUAL CUDA)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -Xcompiler ${OpenMP_CXX_FLAGS})
ENDIF()
ENDIF()

View File

@ -160,6 +160,12 @@ FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME)
)
ENDIF()
ENDIF()
# We noticed problems with -fvisibility=hidden for inline static variables
# if Kokkos was built as shared library.
IF(BUILD_SHARED_LIBS)
SET_PROPERTY(TARGET ${PACKAGE_NAME}_${ROOT_NAME} PROPERTY VISIBILITY_INLINES_HIDDEN ON)
SET_PROPERTY(TARGET ${PACKAGE_NAME}_${ROOT_NAME} PROPERTY CXX_VISIBILITY_PRESET hidden)
ENDIF()
ENDFUNCTION()
FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME)
@ -241,34 +247,6 @@ MACRO(KOKKOS_CONFIGURE_CORE)
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_FwdBackend.hpp "KOKKOS_FWD" "fwd/Kokkos_Fwd" "${KOKKOS_ENABLED_DEVICES}")
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_SetupBackend.hpp "KOKKOS_SETUP" "setup/Kokkos_Setup" "${DEVICE_SETUP_LIST}")
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_DeclareBackend.hpp "KOKKOS_DECLARE" "decl/Kokkos_Declare" "${KOKKOS_ENABLED_DEVICES}")
SET(_DEFAULT_HOST_MEMSPACE "::Kokkos::HostSpace")
KOKKOS_OPTION(DEFAULT_DEVICE_MEMORY_SPACE "" STRING "Override default device memory space")
KOKKOS_OPTION(DEFAULT_HOST_MEMORY_SPACE "" STRING "Override default host memory space")
KOKKOS_OPTION(DEFAULT_DEVICE_EXECUTION_SPACE "" STRING "Override default device execution space")
KOKKOS_OPTION(DEFAULT_HOST_PARALLEL_EXECUTION_SPACE "" STRING "Override default host parallel execution space")
IF (NOT Kokkos_DEFAULT_DEVICE_EXECUTION_SPACE STREQUAL "")
SET(_DEVICE_PARALLEL ${Kokkos_DEFAULT_DEVICE_EXECUTION_SPACE})
MESSAGE(STATUS "Override default device execution space: ${_DEVICE_PARALLEL}")
SET(KOKKOS_DEVICE_SPACE_ACTIVE ON)
ELSE()
IF (_DEVICE_PARALLEL STREQUAL "NoTypeDefined")
SET(KOKKOS_DEVICE_SPACE_ACTIVE OFF)
ELSE()
SET(KOKKOS_DEVICE_SPACE_ACTIVE ON)
ENDIF()
ENDIF()
IF (NOT Kokkos_DEFAULT_HOST_PARALLEL_EXECUTION_SPACE STREQUAL "")
SET(_HOST_PARALLEL ${Kokkos_DEFAULT_HOST_PARALLEL_EXECUTION_SPACE})
MESSAGE(STATUS "Override default host parallel execution space: ${_HOST_PARALLEL}")
SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE ON)
ELSE()
IF (_HOST_PARALLEL STREQUAL "NoTypeDefined")
SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE OFF)
ELSE()
SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE ON)
ENDIF()
ENDIF()
#We are ready to configure the header
CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY)
ENDMACRO()
@ -484,15 +462,10 @@ ENDFUNCTION()
FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET)
IF(KOKKOS_HAS_TRILINOS)
#ignore the target, tribits doesn't do anything directly with targets
TRIBITS_INCLUDE_DIRECTORIES(${ARGN})
ELSE() #append to a list for later
KOKKOS_LIB_TYPE(${TARGET} INCTYPE)
FOREACH(DIR ${ARGN})
TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $<BUILD_INTERFACE:${DIR}>)
ENDFOREACH()
ENDIF()
KOKKOS_LIB_TYPE(${TARGET} INCTYPE)
FOREACH(DIR ${ARGN})
TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $<BUILD_INTERFACE:${DIR}>)
ENDFOREACH()
ENDFUNCTION()
FUNCTION(KOKKOS_LIB_COMPILE_OPTIONS TARGET)

View File

@ -1,26 +0,0 @@
#@HEADER
# ************************************************************************
#
# Kokkos v. 4.0
# Copyright (2022) National Technology & Engineering
# Solutions of Sandia, LLC (NTESS).
#
# Under the terms of Contract DE-NA0003525 with NTESS,
# the U.S. Government retains certain rights in this software.
#
# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
#
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#@HEADER
# Check for CUDA support
IF (NOT TPL_ENABLE_CUDA)
MESSAGE(FATAL_ERROR "\nCUSPARSE requires CUDA")
ELSE()
GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
ENDIF()

View File

@ -944,13 +944,13 @@ class DualView : public ViewTraits<DataType, Properties...> {
if (sizeMismatch) {
::Kokkos::realloc(arg_prop, d_view, n0, n1, n2, n3, n4, n5, n6, n7);
if (alloc_prop_input::initialize) {
if constexpr (alloc_prop_input::initialize) {
h_view = create_mirror_view(typename t_host::memory_space(), d_view);
} else {
h_view = create_mirror_view(Kokkos::WithoutInitializing,
typename t_host::memory_space(), d_view);
}
} else if (alloc_prop_input::initialize) {
} else if constexpr (alloc_prop_input::initialize) {
if constexpr (alloc_prop_input::has_execution_space) {
const auto& exec_space =
Impl::get_property<Impl::ExecutionSpaceTag>(arg_prop);
@ -1038,12 +1038,10 @@ class DualView : public ViewTraits<DataType, Properties...> {
/* Resize on Device */
if (sizeMismatch) {
::Kokkos::resize(properties, d_view, n0, n1, n2, n3, n4, n5, n6, n7);
if (alloc_prop_input::initialize) {
h_view = create_mirror_view(typename t_host::memory_space(), d_view);
} else {
h_view = create_mirror_view(Kokkos::WithoutInitializing,
typename t_host::memory_space(), d_view);
}
// this part of the lambda was relocated in a method as it contains a
// `if constexpr`. In some cases, both branches were evaluated
// leading to a compile error
resync_host(properties);
/* Mark Device copy as modified */
++modified_flags(1);
@ -1054,13 +1052,10 @@ class DualView : public ViewTraits<DataType, Properties...> {
/* Resize on Host */
if (sizeMismatch) {
::Kokkos::resize(properties, h_view, n0, n1, n2, n3, n4, n5, n6, n7);
if (alloc_prop_input::initialize) {
d_view = create_mirror_view(typename t_dev::memory_space(), h_view);
} else {
d_view = create_mirror_view(Kokkos::WithoutInitializing,
typename t_dev::memory_space(), h_view);
}
// this part of the lambda was relocated in a method as it contains a
// `if constexpr`. In some cases, both branches were evaluated
// leading to a compile error
resync_device(properties);
/* Mark Host copy as modified */
++modified_flags(0);
@ -1099,6 +1094,39 @@ class DualView : public ViewTraits<DataType, Properties...> {
}
}
private:
// resync host mirror from device
// this code was relocated from a lambda as it contains a `if constexpr`.
// In some cases, both branches were evaluated, leading to a compile error
template <class... ViewCtorArgs>
inline void resync_host(Impl::ViewCtorProp<ViewCtorArgs...> const&) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
if constexpr (alloc_prop_input::initialize) {
h_view = create_mirror_view(typename t_host::memory_space(), d_view);
} else {
h_view = create_mirror_view(Kokkos::WithoutInitializing,
typename t_host::memory_space(), d_view);
}
}
// resync device mirror from host
// this code was relocated from a lambda as it contains a `if constexpr`
// In some cases, both branches were evaluated leading to a compile error
template <class... ViewCtorArgs>
inline void resync_device(Impl::ViewCtorProp<ViewCtorArgs...> const&) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
if constexpr (alloc_prop_input::initialize) {
d_view = create_mirror_view(typename t_dev::memory_space(), h_view);
} else {
d_view = create_mirror_view(Kokkos::WithoutInitializing,
typename t_dev::memory_space(), h_view);
}
}
public:
void resize(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,

View File

@ -1657,8 +1657,7 @@ KOKKOS_FUNCTION auto as_view_of_rank_n(
if constexpr (std::is_same_v<decltype(layout), Kokkos::LayoutLeft> ||
std::is_same_v<decltype(layout), Kokkos::LayoutRight> ||
std::is_same_v<decltype(layout), Kokkos::LayoutStride> ||
is_layouttiled<decltype(layout)>::value) {
std::is_same_v<decltype(layout), Kokkos::LayoutStride>) {
for (int i = N; i < 7; ++i)
layout.dimension[i] = KOKKOS_IMPL_CTOR_DEFAULT_ARG;
}
@ -1933,254 +1932,155 @@ struct MirrorDRVType {
} // namespace Impl
namespace Impl {
// create a mirror
// private interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs>
inline typename DynRankView<T, P...>::HostMirror create_mirror(
const DynRankView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
std::enable_if_t<!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* =
nullptr) {
using src_type = DynRankView<T, P...>;
using dst_type = typename src_type::HostMirror;
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
!alloc_prop_input::has_label,
"The view constructor arguments passed to Kokkos::create_mirror "
"must not include a label!");
static_assert(
!alloc_prop_input::has_pointer,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not include a pointer!");
static_assert(
!alloc_prop_input::allow_padding,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not explicitly allow padding!");
inline auto create_mirror(const DynRankView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
check_view_ctor_args_create_mirror<ViewCtorArgs...>();
auto prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string(src.label()).append("_mirror"));
return dst_type(prop_copy, Impl::reconstructLayout(src.layout(), src.rank()));
}
if constexpr (Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
using dst_type = typename Impl::MirrorDRVType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type;
template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror(
const DynRankView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
std::enable_if_t<Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* =
nullptr) {
using dst_type = typename Impl::MirrorDRVType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type;
return dst_type(prop_copy,
Impl::reconstructLayout(src.layout(), src.rank()));
} else {
using src_type = DynRankView<T, P...>;
using dst_type = typename src_type::HostMirror;
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
!alloc_prop_input::has_label,
"The view constructor arguments passed to Kokkos::create_mirror "
"must not include a label!");
static_assert(
!alloc_prop_input::has_pointer,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not include a pointer!");
static_assert(
!alloc_prop_input::allow_padding,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not explicitly allow padding!");
auto prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string(src.label()).append("_mirror"));
return dst_type(prop_copy, Impl::reconstructLayout(src.layout(), src.rank()));
return dst_type(prop_copy,
Impl::reconstructLayout(src.layout(), src.rank()));
}
#if defined(KOKKOS_COMPILER_INTEL) || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}
} // namespace Impl
// Create a mirror in host space
template <class T, class... P>
inline typename DynRankView<T, P...>::HostMirror create_mirror(
const DynRankView<T, P...>& src,
std::enable_if_t<std::is_same<typename ViewTraits<T, P...>::specialize,
void>::value>* = nullptr) {
return Impl::create_mirror(src, Kokkos::Impl::ViewCtorProp<>{});
// public interface
template <class T, class... P,
class Enable = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror(const DynRankView<T, P...>& src) {
return Impl::create_mirror(src, Kokkos::view_alloc());
}
template <class T, class... P>
inline typename DynRankView<T, P...>::HostMirror create_mirror(
Kokkos::Impl::WithoutInitializing_t wi, const DynRankView<T, P...>& src,
std::enable_if_t<std::is_same<typename ViewTraits<T, P...>::specialize,
void>::value>* = nullptr) {
// public interface that accepts a without initializing flag
template <class T, class... P,
class Enable = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi,
const DynRankView<T, P...>& src) {
return Impl::create_mirror(src, Kokkos::view_alloc(wi));
}
template <class T, class... P, class... ViewCtorArgs>
inline typename DynRankView<T, P...>::HostMirror create_mirror(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const DynRankView<T, P...>& src,
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* = nullptr) {
return Impl::create_mirror(src, arg_prop);
}
// Create a mirror in a new space
// public interface that accepts a space
template <class Space, class T, class... P,
typename Enable = std::enable_if_t<
class Enable = std::enable_if_t<
Kokkos::is_space<Space>::value &&
std::is_void<typename ViewTraits<T, P...>::specialize>::value>>
typename Impl::MirrorDRVType<Space, T, P...>::view_type create_mirror(
const Space&, const Kokkos::DynRankView<T, P...>& src) {
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
auto create_mirror(const Space&, const Kokkos::DynRankView<T, P...>& src) {
return Impl::create_mirror(
src, Kokkos::view_alloc(typename Space::memory_space{}));
}
template <class Space, class T, class... P>
typename Impl::MirrorDRVType<Space, T, P...>::view_type create_mirror(
Kokkos::Impl::WithoutInitializing_t wi, const Space&,
const Kokkos::DynRankView<T, P...>& src,
std::enable_if_t<std::is_same<typename ViewTraits<T, P...>::specialize,
void>::value>* = nullptr) {
// public interface that accepts a space and a without initializing flag
template <class Space, class T, class... P,
class Enable = std::enable_if_t<
Kokkos::is_space<Space>::value &&
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&,
const Kokkos::DynRankView<T, P...>& src) {
return Impl::create_mirror(
src, Kokkos::view_alloc(wi, typename Space::memory_space{}));
}
template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const DynRankView<T, P...>& src,
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* = nullptr) {
using ReturnType = typename Impl::MirrorDRVType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type;
return ReturnType{Impl::create_mirror(src, arg_prop)};
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs,
typename Enable = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const DynRankView<T, P...>& src) {
return Impl::create_mirror(src, arg_prop);
}
namespace Impl {
template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t<
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
std::is_same<
typename DynRankView<T, P...>::memory_space,
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename DynRankView<T, P...>::data_type,
typename DynRankView<T, P...>::HostMirror::data_type>::value,
typename DynRankView<T, P...>::HostMirror>
create_mirror_view(const DynRankView<T, P...>& src,
const typename Impl::ViewCtorProp<ViewCtorArgs...>&) {
return src;
}
// create a mirror view
// private interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t<
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
!(std::is_same<
typename DynRankView<T, P...>::memory_space,
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename DynRankView<T, P...>::data_type,
typename DynRankView<T, P...>::HostMirror::data_type>::value),
typename DynRankView<T, P...>::HostMirror>
create_mirror_view(
inline auto create_mirror_view(
const DynRankView<T, P...>& src,
const typename Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
return Kokkos::Impl::create_mirror(src, arg_prop);
[[maybe_unused]] const typename Impl::ViewCtorProp<ViewCtorArgs...>&
arg_prop) {
if constexpr (!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
if constexpr (std::is_same<typename DynRankView<T, P...>::memory_space,
typename DynRankView<
T, P...>::HostMirror::memory_space>::value &&
std::is_same<typename DynRankView<T, P...>::data_type,
typename DynRankView<
T, P...>::HostMirror::data_type>::value) {
return typename DynRankView<T, P...>::HostMirror(src);
} else {
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
}
} else {
if constexpr (Impl::MirrorDRViewType<typename Impl::ViewCtorProp<
ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace) {
return typename Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type(src);
} else {
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
}
}
#if defined(KOKKOS_COMPILER_INTEL) || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}
template <class T, class... P, class... ViewCtorArgs,
class = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
inline std::enable_if_t<
Kokkos::is_space<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space>::value &&
Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace,
typename Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type>
create_mirror_view(const Kokkos::DynRankView<T, P...>& src,
const typename Impl::ViewCtorProp<ViewCtorArgs...>&) {
return src;
}
template <class T, class... P, class... ViewCtorArgs,
class = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
inline std::enable_if_t<
Kokkos::is_space<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space>::value &&
!Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace,
typename Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type>
create_mirror_view(
const Kokkos::DynRankView<T, P...>& src,
const typename Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
return Kokkos::Impl::create_mirror(src, arg_prop);
}
} // namespace Impl
// Create a mirror view in host space
// public interface
template <class T, class... P>
inline std::enable_if_t<
(std::is_same<
typename DynRankView<T, P...>::memory_space,
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
std::is_same<typename DynRankView<T, P...>::data_type,
typename DynRankView<T, P...>::HostMirror::data_type>::value),
typename DynRankView<T, P...>::HostMirror>
create_mirror_view(const Kokkos::DynRankView<T, P...>& src) {
return src;
}
template <class T, class... P>
inline std::enable_if_t<
!(std::is_same<
typename DynRankView<T, P...>::memory_space,
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename DynRankView<T, P...>::data_type,
typename DynRankView<T, P...>::HostMirror::data_type>::value),
typename DynRankView<T, P...>::HostMirror>
create_mirror_view(const Kokkos::DynRankView<T, P...>& src) {
return Kokkos::create_mirror(src);
inline auto create_mirror_view(const Kokkos::DynRankView<T, P...>& src) {
return Impl::create_mirror_view(src, Kokkos::view_alloc());
}
// public interface that accepts a without initializing flag
template <class T, class... P>
inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi,
const DynRankView<T, P...>& src) {
return Impl::create_mirror_view(src, Kokkos::view_alloc(wi));
}
// Create a mirror view in a new space
// FIXME_C++17 Improve SFINAE here.
// public interface that accepts a space
template <class Space, class T, class... P,
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
inline typename Impl::MirrorDRViewType<Space, T, P...>::view_type
create_mirror_view(
const Space&, const Kokkos::DynRankView<T, P...>& src,
std::enable_if_t<
Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>* = nullptr) {
return src;
inline auto create_mirror_view(const Space&,
const Kokkos::DynRankView<T, P...>& src) {
return Impl::create_mirror_view(
src, Kokkos::view_alloc(typename Space::memory_space()));
}
// FIXME_C++17 Improve SFINAE here.
// public interface that accepts a space and a without initializing flag
template <class Space, class T, class... P,
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
inline typename Impl::MirrorDRViewType<Space, T, P...>::view_type
create_mirror_view(
const Space& space, const Kokkos::DynRankView<T, P...>& src,
std::enable_if_t<
!Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>* = nullptr) {
return Kokkos::create_mirror(space, src);
}
template <class Space, class T, class... P>
typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi,
const Space&,
const Kokkos::DynRankView<T, P...>& src) {
@ -2188,6 +2088,8 @@ inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi,
src, Kokkos::view_alloc(typename Space::memory_space{}, wi));
}
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror_view(
const typename Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
@ -2195,75 +2097,51 @@ inline auto create_mirror_view(
return Impl::create_mirror_view(src, arg_prop);
}
template <class... ViewCtorArgs, class T, class... P>
// create a mirror view and deep copy it
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class... ViewCtorArgs, class T, class... P,
class Enable = std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value>>
auto create_mirror_view_and_copy(
const Impl::ViewCtorProp<ViewCtorArgs...>&,
const Kokkos::DynRankView<T, P...>& src,
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace>* = nullptr) {
[[maybe_unused]] const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::DynRankView<T, P...>& src) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
alloc_prop_input::has_memory_space,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must include a memory space!");
static_assert(!alloc_prop_input::has_pointer,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not include a pointer!");
static_assert(!alloc_prop_input::allow_padding,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not explicitly allow padding!");
// same behavior as deep_copy(src, src)
if (!alloc_prop_input::has_execution_space)
fence(
"Kokkos::create_mirror_view_and_copy: fence before returning src view");
return src;
}
Impl::check_view_ctor_args_create_mirror_view_and_copy<ViewCtorArgs...>();
template <class... ViewCtorArgs, class T, class... P>
auto create_mirror_view_and_copy(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::DynRankView<T, P...>& src,
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
!Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace>* = nullptr) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
alloc_prop_input::has_memory_space,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must include a memory space!");
static_assert(!alloc_prop_input::has_pointer,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not include a pointer!");
static_assert(!alloc_prop_input::allow_padding,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not explicitly allow padding!");
using Space = typename alloc_prop_input::memory_space;
using Mirror = typename Impl::MirrorDRViewType<Space, T, P...>::view_type;
if constexpr (Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace) {
// same behavior as deep_copy(src, src)
if constexpr (!alloc_prop_input::has_execution_space)
fence(
"Kokkos::create_mirror_view_and_copy: fence before returning src "
"view");
return src;
} else {
using Space = typename alloc_prop_input::memory_space;
using Mirror = typename Impl::MirrorDRViewType<Space, T, P...>::view_type;
auto arg_prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string{}, WithoutInitializing,
typename Space::execution_space{});
auto arg_prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string{}, WithoutInitializing,
typename Space::execution_space{});
std::string& label = Impl::get_property<Impl::LabelTag>(arg_prop_copy);
if (label.empty()) label = src.label();
auto mirror = typename Mirror::non_const_type{
arg_prop_copy, Impl::reconstructLayout(src.layout(), src.rank())};
if constexpr (alloc_prop_input::has_execution_space) {
deep_copy(Impl::get_property<Impl::ExecutionSpaceTag>(arg_prop_copy),
mirror, src);
} else
deep_copy(mirror, src);
return mirror;
std::string& label = Impl::get_property<Impl::LabelTag>(arg_prop_copy);
if (label.empty()) label = src.label();
auto mirror = typename Mirror::non_const_type{
arg_prop_copy, Impl::reconstructLayout(src.layout(), src.rank())};
if constexpr (alloc_prop_input::has_execution_space) {
deep_copy(Impl::get_property<Impl::ExecutionSpaceTag>(arg_prop_copy),
mirror, src);
} else
deep_copy(mirror, src);
return mirror;
}
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC)
__builtin_unreachable();
#endif
}
template <class Space, class T, class... P>

View File

@ -590,96 +590,81 @@ struct MirrorDynamicViewType {
} // namespace Impl
namespace Impl {
template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror(
const Kokkos::Experimental::DynamicView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
std::enable_if_t<!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* =
nullptr) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
!alloc_prop_input::has_label,
"The view constructor arguments passed to Kokkos::create_mirror "
"must not include a label!");
static_assert(
!alloc_prop_input::has_pointer,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not include a pointer!");
static_assert(
!alloc_prop_input::allow_padding,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not explicitly allow padding!");
// create a mirror
// private interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror(const Kokkos::Experimental::DynamicView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
check_view_ctor_args_create_mirror<ViewCtorArgs...>();
auto prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string(src.label()).append("_mirror"));
auto ret = typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror(
prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size());
if constexpr (Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
using MemorySpace = typename alloc_prop_input::memory_space;
ret.resize_serial(src.extent(0));
auto ret = typename Kokkos::Impl::MirrorDynamicViewType<
MemorySpace, T, P...>::view_type(prop_copy, src.chunk_size(),
src.chunk_max() * src.chunk_size());
return ret;
ret.resize_serial(src.extent(0));
return ret;
} else {
auto ret = typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror(
prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size());
ret.resize_serial(src.extent(0));
return ret;
}
#if defined(KOKKOS_COMPILER_INTEL) || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}
template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror(
const Kokkos::Experimental::DynamicView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
std::enable_if_t<Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* =
nullptr) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
!alloc_prop_input::has_label,
"The view constructor arguments passed to Kokkos::create_mirror "
"must not include a label!");
static_assert(
!alloc_prop_input::has_pointer,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not include a pointer!");
static_assert(
!alloc_prop_input::allow_padding,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not explicitly allow padding!");
using MemorySpace = typename alloc_prop_input::memory_space;
auto prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string(src.label()).append("_mirror"));
auto ret = typename Kokkos::Impl::MirrorDynamicViewType<
MemorySpace, T, P...>::view_type(prop_copy, src.chunk_size(),
src.chunk_max() * src.chunk_size());
ret.resize_serial(src.extent(0));
return ret;
}
} // namespace Impl
// Create a mirror in host space
template <class T, class... P>
// public interface
template <class T, class... P,
typename Enable = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror(
const Kokkos::Experimental::DynamicView<T, P...>& src) {
return Impl::create_mirror(src, Impl::ViewCtorProp<>{});
}
template <class T, class... P>
// public interface that accepts a without initializing flag
template <class T, class... P,
typename Enable = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror(
Kokkos::Impl::WithoutInitializing_t wi,
const Kokkos::Experimental::DynamicView<T, P...>& src) {
return Impl::create_mirror(src, Kokkos::view_alloc(wi));
}
// Create a mirror in a new space
template <class Space, class T, class... P>
// public interface that accepts a space
template <class Space, class T, class... P,
typename Enable = std::enable_if_t<
Kokkos::is_space<Space>::value &&
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror(
const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src) {
return Impl::create_mirror(
src, Kokkos::view_alloc(typename Space::memory_space{}));
}
template <class Space, class T, class... P>
// public interface that accepts a space and a without initializing flag
template <class Space, class T, class... P,
typename Enable = std::enable_if_t<
Kokkos::is_space<Space>::value &&
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
typename Kokkos::Impl::MirrorDynamicViewType<Space, T, P...>::view_type
create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&,
const Kokkos::Experimental::DynamicView<T, P...>& src) {
@ -687,7 +672,11 @@ create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&,
src, Kokkos::view_alloc(wi, typename Space::memory_space{}));
}
template <class T, class... P, class... ViewCtorArgs>
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs,
typename Enable = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::Experimental::DynamicView<T, P...>& src) {
@ -696,76 +685,56 @@ inline auto create_mirror(
namespace Impl {
// create a mirror view
// private interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t<
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
(std::is_same<
typename Kokkos::Experimental::DynamicView<T, P...>::memory_space,
typename Kokkos::Experimental::DynamicView<
T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename Kokkos::Experimental::DynamicView<T, P...>::data_type,
typename Kokkos::Experimental::DynamicView<
T, P...>::HostMirror::data_type>::value),
typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror>
create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>&) {
return src;
inline auto create_mirror_view(
const Kokkos::Experimental::DynamicView<T, P...>& src,
[[maybe_unused]] const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
if constexpr (!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
if constexpr (std::is_same<typename Kokkos::Experimental::DynamicView<
T, P...>::memory_space,
typename Kokkos::Experimental::DynamicView<
T, P...>::HostMirror::memory_space>::value &&
std::is_same<typename Kokkos::Experimental::DynamicView<
T, P...>::data_type,
typename Kokkos::Experimental::DynamicView<
T, P...>::HostMirror::data_type>::value) {
return
typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror(src);
} else {
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
}
} else {
if constexpr (Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<
ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace) {
return typename Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type(src);
} else {
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
}
}
#if defined(KOKKOS_COMPILER_INTEL) || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}
template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t<
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
!(std::is_same<
typename Kokkos::Experimental::DynamicView<T, P...>::memory_space,
typename Kokkos::Experimental::DynamicView<
T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename Kokkos::Experimental::DynamicView<T, P...>::data_type,
typename Kokkos::Experimental::DynamicView<
T, P...>::HostMirror::data_type>::value),
typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror>
create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
return Kokkos::create_mirror(arg_prop, src);
}
template <class T, class... P, class... ViewCtorArgs,
class = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
std::enable_if_t<Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace,
typename Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::view_type>
create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>&) {
return src;
}
template <class T, class... P, class... ViewCtorArgs,
class = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
std::enable_if_t<!Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace,
typename Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::view_type>
create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
return Kokkos::Impl::create_mirror(src, arg_prop);
}
} // namespace Impl
// Create a mirror view in host space
// public interface
template <class T, class... P>
inline auto create_mirror_view(
const typename Kokkos::Experimental::DynamicView<T, P...>& src) {
return Impl::create_mirror_view(src, Impl::ViewCtorProp<>{});
}
// public interface that accepts a without initializing flag
template <class T, class... P>
inline auto create_mirror_view(
Kokkos::Impl::WithoutInitializing_t wi,
@ -773,15 +742,18 @@ inline auto create_mirror_view(
return Impl::create_mirror_view(src, Kokkos::view_alloc(wi));
}
// Create a mirror in a new space
template <class Space, class T, class... P>
// public interface that accepts a space
template <class Space, class T, class... P,
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
inline auto create_mirror_view(
const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src) {
return Impl::create_mirror_view(src,
view_alloc(typename Space::memory_space{}));
}
template <class Space, class T, class... P>
// public interface that accepts a space and a without initializing flag
template <class Space, class T, class... P,
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
inline auto create_mirror_view(
Kokkos::Impl::WithoutInitializing_t wi, const Space&,
const Kokkos::Experimental::DynamicView<T, P...>& src) {
@ -789,6 +761,8 @@ inline auto create_mirror_view(
src, Kokkos::view_alloc(wi, typename Space::memory_space{}));
}
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror_view(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
@ -985,80 +959,57 @@ struct ViewCopy<Kokkos::Experimental::DynamicView<DP...>,
} // namespace Impl
template <class... ViewCtorArgs, class T, class... P>
// create a mirror view and deep copy it
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class... ViewCtorArgs, class T, class... P,
class Enable = std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value>>
auto create_mirror_view_and_copy(
const Impl::ViewCtorProp<ViewCtorArgs...>&,
const Kokkos::Experimental::DynamicView<T, P...>& src,
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace>* = nullptr) {
[[maybe_unused]] const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::Experimental::DynamicView<T, P...>& src) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
alloc_prop_input::has_memory_space,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must include a memory space!");
static_assert(!alloc_prop_input::has_pointer,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not include a pointer!");
static_assert(!alloc_prop_input::allow_padding,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not explicitly allow padding!");
// same behavior as deep_copy(src, src)
if (!alloc_prop_input::has_execution_space)
fence(
"Kokkos::create_mirror_view_and_copy: fence before returning src view");
return src;
Impl::check_view_ctor_args_create_mirror_view_and_copy<ViewCtorArgs...>();
if constexpr (Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace) {
// same behavior as deep_copy(src, src)
if constexpr (!alloc_prop_input::has_execution_space)
fence(
"Kokkos::create_mirror_view_and_copy: fence before returning src "
"view");
return src;
} else {
using Space = typename alloc_prop_input::memory_space;
using Mirror =
typename Impl::MirrorDynamicViewType<Space, T, P...>::view_type;
auto arg_prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string{}, WithoutInitializing,
typename Space::execution_space{});
std::string& label = Impl::get_property<Impl::LabelTag>(arg_prop_copy);
if (label.empty()) label = src.label();
auto mirror = typename Mirror::non_const_type(
arg_prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size());
mirror.resize_serial(src.extent(0));
if constexpr (alloc_prop_input::has_execution_space) {
deep_copy(Impl::get_property<Impl::ExecutionSpaceTag>(arg_prop_copy),
mirror, src);
} else
deep_copy(mirror, src);
return mirror;
}
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC)
__builtin_unreachable();
#endif
}
template <class... ViewCtorArgs, class T, class... P>
auto create_mirror_view_and_copy(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::Experimental::DynamicView<T, P...>& src,
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
!Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace>* = nullptr) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
alloc_prop_input::has_memory_space,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must include a memory space!");
static_assert(!alloc_prop_input::has_pointer,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not include a pointer!");
static_assert(!alloc_prop_input::allow_padding,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not explicitly allow padding!");
using Space = typename alloc_prop_input::memory_space;
using Mirror =
typename Impl::MirrorDynamicViewType<Space, T, P...>::view_type;
auto arg_prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string{}, WithoutInitializing,
typename Space::execution_space{});
std::string& label = Impl::get_property<Impl::LabelTag>(arg_prop_copy);
if (label.empty()) label = src.label();
auto mirror = typename Mirror::non_const_type(
arg_prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size());
mirror.resize_serial(src.extent(0));
if constexpr (alloc_prop_input::has_execution_space) {
deep_copy(Impl::get_property<Impl::ExecutionSpaceTag>(arg_prop_copy),
mirror, src);
} else
deep_copy(mirror, src);
return mirror;
}
template <class Space, class T, class... P>
template <class Space, class T, class... P,
typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
auto create_mirror_view_and_copy(
const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src,
std::string const& name = "") {

View File

@ -471,62 +471,31 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
template <typename I0, typename I1>
KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
(Kokkos::Impl::are_integral<I0, I1>::value && (2 == Rank) &&
is_default_map && is_layout_left && (traits::rank_dynamic == 0)),
is_default_map &&
(is_layout_left || is_layout_right || is_layout_stride)),
reference_type>
operator()(const I0& i0, const I1& i1) const {
KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1))
const size_t j0 = i0 - m_begins[0];
const size_t j1 = i1 - m_begins[1];
return m_map.m_impl_handle[j0 + m_map.m_impl_offset.m_dim.N0 * j1];
}
template <typename I0, typename I1>
KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
(Kokkos::Impl::are_integral<I0, I1>::value && (2 == Rank) &&
is_default_map && is_layout_left && (traits::rank_dynamic != 0)),
reference_type>
operator()(const I0& i0, const I1& i1) const {
KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1))
const size_t j0 = i0 - m_begins[0];
const size_t j1 = i1 - m_begins[1];
return m_map.m_impl_handle[j0 + m_map.m_impl_offset.m_stride * j1];
}
template <typename I0, typename I1>
KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
(Kokkos::Impl::are_integral<I0, I1>::value && (2 == Rank) &&
is_default_map && is_layout_right && (traits::rank_dynamic == 0)),
reference_type>
operator()(const I0& i0, const I1& i1) const {
KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1))
const size_t j0 = i0 - m_begins[0];
const size_t j1 = i1 - m_begins[1];
return m_map.m_impl_handle[j1 + m_map.m_impl_offset.m_dim.N1 * j0];
}
template <typename I0, typename I1>
KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
(Kokkos::Impl::are_integral<I0, I1>::value && (2 == Rank) &&
is_default_map && is_layout_right && (traits::rank_dynamic != 0)),
reference_type>
operator()(const I0& i0, const I1& i1) const {
KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1))
const size_t j0 = i0 - m_begins[0];
const size_t j1 = i1 - m_begins[1];
return m_map.m_impl_handle[j1 + m_map.m_impl_offset.m_stride * j0];
}
template <typename I0, typename I1>
KOKKOS_FORCEINLINE_FUNCTION
std::enable_if_t<(Kokkos::Impl::are_integral<I0, I1>::value &&
(2 == Rank) && is_default_map && is_layout_stride),
reference_type>
operator()(const I0& i0, const I1& i1) const {
KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1))
const size_t j0 = i0 - m_begins[0];
const size_t j1 = i1 - m_begins[1];
return m_map.m_impl_handle[j0 * m_map.m_impl_offset.m_stride.S0 +
j1 * m_map.m_impl_offset.m_stride.S1];
if constexpr (is_layout_left) {
if constexpr (traits::rank_dynamic == 0)
return m_map.m_impl_handle[j0 + m_map.m_impl_offset.m_dim.N0 * j1];
else
return m_map.m_impl_handle[j0 + m_map.m_impl_offset.m_stride * j1];
} else if constexpr (is_layout_right) {
if constexpr (traits::rank_dynamic == 0)
return m_map.m_impl_handle[j1 + m_map.m_impl_offset.m_dim.N1 * j0];
else
return m_map.m_impl_handle[j1 + m_map.m_impl_offset.m_stride * j0];
} else {
static_assert(is_layout_stride);
return m_map.m_impl_handle[j0 * m_map.m_impl_offset.m_stride.S0 +
j1 * m_map.m_impl_offset.m_stride.S1];
}
#if defined(KOKKOS_COMPILER_INTEL)
__builtin_unreachable();
#endif
}
//------------------------------
@ -1841,71 +1810,73 @@ struct MirrorOffsetType {
} // namespace Impl
namespace Impl {
template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t<
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space,
typename Kokkos::Experimental::OffsetView<T, P...>::HostMirror>
create_mirror(const Kokkos::Experimental::OffsetView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
return typename Kokkos::Experimental::OffsetView<T, P...>::HostMirror(
Kokkos::create_mirror(arg_prop, src.view()), src.begins());
}
template <class T, class... P, class... ViewCtorArgs,
class = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
// create a mirror
// private interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror(const Kokkos::Experimental::OffsetView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
using Space = typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space;
check_view_ctor_args_create_mirror<ViewCtorArgs...>();
static_assert(
!alloc_prop_input::has_label,
"The view constructor arguments passed to Kokkos::create_mirror "
"must not include a label!");
static_assert(
!alloc_prop_input::has_pointer,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not include a pointer!");
static_assert(
!alloc_prop_input::allow_padding,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not explicitly allow padding!");
if constexpr (Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
using Space = typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space;
auto prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string(src.label()).append("_mirror"));
auto prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string(src.label()).append("_mirror"));
return typename Kokkos::Impl::MirrorOffsetType<Space, T, P...>::view_type(
prop_copy, src.layout(),
{src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4),
src.begin(5), src.begin(6), src.begin(7)});
return typename Kokkos::Impl::MirrorOffsetType<Space, T, P...>::view_type(
prop_copy, src.layout(),
{src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4),
src.begin(5), src.begin(6), src.begin(7)});
} else {
return typename Kokkos::Experimental::OffsetView<T, P...>::HostMirror(
Kokkos::create_mirror(arg_prop, src.view()), src.begins());
}
#if defined(KOKKOS_COMPILER_INTEL) || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}
} // namespace Impl
// Create a mirror in host space
template <class T, class... P>
// public interface
template <class T, class... P,
typename = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror(
const Kokkos::Experimental::OffsetView<T, P...>& src) {
return Impl::create_mirror(src, Impl::ViewCtorProp<>{});
}
template <class T, class... P>
// public interface that accepts a without initializing flag
template <class T, class... P,
typename = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror(
Kokkos::Impl::WithoutInitializing_t wi,
const Kokkos::Experimental::OffsetView<T, P...>& src) {
return Impl::create_mirror(src, Kokkos::view_alloc(wi));
}
// Create a mirror in a new space
// public interface that accepts a space
template <class Space, class T, class... P,
typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
typename Enable = std::enable_if_t<
Kokkos::is_space<Space>::value &&
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror(
const Space&, const Kokkos::Experimental::OffsetView<T, P...>& src) {
return Impl::create_mirror(
src, Kokkos::view_alloc(typename Space::memory_space{}));
}
template <class Space, class T, class... P>
// public interface that accepts a space and a without initializing flag
template <class Space, class T, class... P,
typename Enable = std::enable_if_t<
Kokkos::is_space<Space>::value &&
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
typename Kokkos::Impl::MirrorOffsetType<Space, T, P...>::view_type
create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&,
const Kokkos::Experimental::OffsetView<T, P...>& src) {
@ -1913,7 +1884,11 @@ create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&,
src, Kokkos::view_alloc(typename Space::memory_space{}, wi));
}
template <class T, class... P, class... ViewCtorArgs>
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs,
typename = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::Experimental::OffsetView<T, P...>& src) {
@ -1921,76 +1896,56 @@ inline auto create_mirror(
}
namespace Impl {
// create a mirror view
// private interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t<
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
(std::is_same<
typename Kokkos::Experimental::OffsetView<T, P...>::memory_space,
typename Kokkos::Experimental::OffsetView<
T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename Kokkos::Experimental::OffsetView<T, P...>::data_type,
typename Kokkos::Experimental::OffsetView<
T, P...>::HostMirror::data_type>::value),
typename Kokkos::Experimental::OffsetView<T, P...>::HostMirror>
create_mirror_view(const Kokkos::Experimental::OffsetView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>&) {
return src;
inline auto create_mirror_view(
const Kokkos::Experimental::OffsetView<T, P...>& src,
[[maybe_unused]] const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
if constexpr (!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
if constexpr (std::is_same<typename Kokkos::Experimental::OffsetView<
T, P...>::memory_space,
typename Kokkos::Experimental::OffsetView<
T, P...>::HostMirror::memory_space>::value &&
std::is_same<typename Kokkos::Experimental::OffsetView<
T, P...>::data_type,
typename Kokkos::Experimental::OffsetView<
T, P...>::HostMirror::data_type>::value) {
return
typename Kokkos::Experimental::OffsetView<T, P...>::HostMirror(src);
} else {
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
}
} else {
if constexpr (Impl::MirrorOffsetViewType<typename Impl::ViewCtorProp<
ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace) {
return typename Impl::MirrorOffsetViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type(src);
} else {
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
}
}
#if defined(KOKKOS_COMPILER_INTEL) || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}
template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t<
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
!(std::is_same<
typename Kokkos::Experimental::OffsetView<T, P...>::memory_space,
typename Kokkos::Experimental::OffsetView<
T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename Kokkos::Experimental::OffsetView<T, P...>::data_type,
typename Kokkos::Experimental::OffsetView<
T, P...>::HostMirror::data_type>::value),
typename Kokkos::Experimental::OffsetView<T, P...>::HostMirror>
create_mirror_view(const Kokkos::Experimental::OffsetView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
return Kokkos::create_mirror(arg_prop, src);
}
template <class T, class... P, class... ViewCtorArgs,
class = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
std::enable_if_t<Impl::MirrorOffsetViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace,
typename Impl::MirrorOffsetViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::view_type>
create_mirror_view(const Kokkos::Experimental::OffsetView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>&) {
return src;
}
template <class T, class... P, class... ViewCtorArgs,
class = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
std::enable_if_t<!Impl::MirrorOffsetViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace,
typename Impl::MirrorOffsetViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::view_type>
create_mirror_view(const Kokkos::Experimental::OffsetView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
return Kokkos::Impl::create_mirror(src, arg_prop);
}
} // namespace Impl
// Create a mirror view in host space
// public interface
template <class T, class... P>
inline auto create_mirror_view(
const typename Kokkos::Experimental::OffsetView<T, P...>& src) {
return Impl::create_mirror_view(src, Impl::ViewCtorProp<>{});
}
// public interface that accepts a without initializing flag
template <class T, class... P>
inline auto create_mirror_view(
Kokkos::Impl::WithoutInitializing_t wi,
@ -1998,7 +1953,7 @@ inline auto create_mirror_view(
return Impl::create_mirror_view(src, Kokkos::view_alloc(wi));
}
// Create a mirror view in a new space
// public interface that accepts a space
template <class Space, class T, class... P,
typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
inline auto create_mirror_view(
@ -2007,7 +1962,9 @@ inline auto create_mirror_view(
src, Kokkos::view_alloc(typename Space::memory_space{}));
}
template <class Space, class T, class... P>
// public interface that accepts a space and a without initializing flag
template <class Space, class T, class... P,
typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
inline auto create_mirror_view(
Kokkos::Impl::WithoutInitializing_t wi, const Space&,
const Kokkos::Experimental::OffsetView<T, P...>& src) {
@ -2015,6 +1972,8 @@ inline auto create_mirror_view(
src, Kokkos::view_alloc(typename Space::memory_space{}, wi));
}
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror_view(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
@ -2022,7 +1981,9 @@ inline auto create_mirror_view(
return Impl::create_mirror_view(src, arg_prop);
}
// Create a mirror view and deep_copy in a new space
// create a mirror view and deep copy it
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class... ViewCtorArgs, class T, class... P>
typename Kokkos::Impl::MirrorOffsetViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,

View File

@ -805,56 +805,94 @@ class UnorderedMap {
return *this;
}
// Re-allocate the views of the calling UnorderedMap according to src
// capacity, and deep copy the src data.
template <typename SKey, typename SValue, typename SDevice>
std::enable_if_t<std::is_same<std::remove_const_t<SKey>, key_type>::value &&
std::is_same<std::remove_const_t<SValue>, value_type>::value>
create_copy_view(
UnorderedMap<SKey, SValue, SDevice, Hasher, EqualTo> const &src) {
if (m_hash_lists.data() != src.m_hash_lists.data()) {
insertable_map_type tmp;
allocate_view(src);
deep_copy_view(src);
}
}
tmp.m_bounded_insert = src.m_bounded_insert;
tmp.m_hasher = src.m_hasher;
tmp.m_equal_to = src.m_equal_to;
tmp.m_size() = src.m_size();
tmp.m_available_indexes = bitset_type(src.capacity());
tmp.m_hash_lists = size_type_view(
view_alloc(WithoutInitializing, "UnorderedMap hash list"),
src.m_hash_lists.extent(0));
tmp.m_next_index = size_type_view(
view_alloc(WithoutInitializing, "UnorderedMap next index"),
src.m_next_index.extent(0));
tmp.m_keys =
key_type_view(view_alloc(WithoutInitializing, "UnorderedMap keys"),
src.m_keys.extent(0));
tmp.m_values = value_type_view(
view_alloc(WithoutInitializing, "UnorderedMap values"),
src.m_values.extent(0));
tmp.m_scalars = scalars_view("UnorderedMap scalars");
// Allocate views of the calling UnorderedMap with the same capacity as the
// src.
template <typename SKey, typename SValue, typename SDevice>
std::enable_if_t<std::is_same<std::remove_const_t<SKey>, key_type>::value &&
std::is_same<std::remove_const_t<SValue>, value_type>::value>
allocate_view(
UnorderedMap<SKey, SValue, SDevice, Hasher, EqualTo> const &src) {
insertable_map_type tmp;
Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes);
tmp.m_bounded_insert = src.m_bounded_insert;
tmp.m_hasher = src.m_hasher;
tmp.m_equal_to = src.m_equal_to;
tmp.m_size() = src.m_size();
tmp.m_available_indexes = bitset_type(src.capacity());
tmp.m_hash_lists = size_type_view(
view_alloc(WithoutInitializing, "UnorderedMap hash list"),
src.m_hash_lists.extent(0));
tmp.m_next_index = size_type_view(
view_alloc(WithoutInitializing, "UnorderedMap next index"),
src.m_next_index.extent(0));
tmp.m_keys =
key_type_view(view_alloc(WithoutInitializing, "UnorderedMap keys"),
src.m_keys.extent(0));
tmp.m_values =
value_type_view(view_alloc(WithoutInitializing, "UnorderedMap values"),
src.m_values.extent(0));
tmp.m_scalars = scalars_view("UnorderedMap scalars");
*this = tmp;
}
// Deep copy view data from src. This requires that the src capacity is
// identical to the capacity of the calling UnorderedMap.
template <typename SKey, typename SValue, typename SDevice>
std::enable_if_t<std::is_same<std::remove_const_t<SKey>, key_type>::value &&
std::is_same<std::remove_const_t<SValue>, value_type>::value>
deep_copy_view(
UnorderedMap<SKey, SValue, SDevice, Hasher, EqualTo> const &src) {
#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4
// To deep copy UnorderedMap, capacity must be identical
KOKKOS_EXPECTS(capacity() == src.capacity());
#else
if (capacity() != src.capacity()) {
allocate_view(src);
#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
Kokkos::Impl::log_warning(
"Warning: deep_copy_view() allocating views is deprecated. Must call "
"with UnorderedMaps of identical capacity, or use "
"create_copy_view().\n");
#endif
}
#endif
if (m_hash_lists.data() != src.m_hash_lists.data()) {
Kokkos::deep_copy(m_available_indexes, src.m_available_indexes);
using raw_deep_copy =
Kokkos::Impl::DeepCopy<typename device_type::memory_space,
typename SDevice::memory_space>;
raw_deep_copy(tmp.m_hash_lists.data(), src.m_hash_lists.data(),
raw_deep_copy(m_hash_lists.data(), src.m_hash_lists.data(),
sizeof(size_type) * src.m_hash_lists.extent(0));
raw_deep_copy(tmp.m_next_index.data(), src.m_next_index.data(),
raw_deep_copy(m_next_index.data(), src.m_next_index.data(),
sizeof(size_type) * src.m_next_index.extent(0));
raw_deep_copy(tmp.m_keys.data(), src.m_keys.data(),
raw_deep_copy(m_keys.data(), src.m_keys.data(),
sizeof(key_type) * src.m_keys.extent(0));
if (!is_set) {
raw_deep_copy(tmp.m_values.data(), src.m_values.data(),
raw_deep_copy(m_values.data(), src.m_values.data(),
sizeof(impl_value_type) * src.m_values.extent(0));
}
raw_deep_copy(tmp.m_scalars.data(), src.m_scalars.data(),
raw_deep_copy(m_scalars.data(), src.m_scalars.data(),
sizeof(int) * num_scalars);
Kokkos::fence(
"Kokkos::UnorderedMap::create_copy_view: fence after copy to tmp");
*this = tmp;
"Kokkos::UnorderedMap::deep_copy_view: fence after copy to dst.");
}
}
@ -932,13 +970,25 @@ class UnorderedMap {
friend struct Impl::UnorderedMapPrint;
};
// Specialization of deep_copy for two UnorderedMap objects.
// Specialization of deep_copy() for two UnorderedMap objects.
template <typename DKey, typename DT, typename DDevice, typename SKey,
typename ST, typename SDevice, typename Hasher, typename EqualTo>
inline void deep_copy(
UnorderedMap<DKey, DT, DDevice, Hasher, EqualTo> &dst,
const UnorderedMap<SKey, ST, SDevice, Hasher, EqualTo> &src) {
dst.create_copy_view(src);
dst.deep_copy_view(src);
}
// Specialization of create_mirror() for an UnorderedMap object.
template <typename Key, typename ValueType, typename Device, typename Hasher,
typename EqualTo>
typename UnorderedMap<Key, ValueType, Device, Hasher, EqualTo>::HostMirror
create_mirror(
const UnorderedMap<Key, ValueType, Device, Hasher, EqualTo> &src) {
typename UnorderedMap<Key, ValueType, Device, Hasher, EqualTo>::HostMirror
dst;
dst.allocate_view(src);
return dst;
}
} // namespace Kokkos

View File

@ -55,8 +55,8 @@ struct test_dualview_alloc {
bool result = false;
test_dualview_alloc(unsigned int size) {
result = run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(
size, 3);
result =
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device>>(size, 3);
}
};
@ -154,7 +154,7 @@ struct test_dualview_combinations {
}
test_dualview_combinations(unsigned int size, bool with_init) {
result = run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(
result = run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device>>(
size, 3, with_init);
}
};
@ -253,21 +253,18 @@ struct test_dual_view_deep_copy {
} // end run_me
test_dual_view_deep_copy() {
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(10, 5,
true);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(10, 5,
false);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device>>(10, 5, true);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device>>(10, 5,
false);
// Test zero length but allocated (a.d_view.data!=nullptr but
// a.d_view.span()==0)
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(0, 5, true);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(0, 5,
false);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device>>(0, 5, true);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device>>(0, 5, false);
// Test default constructed view
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(-1, 5,
true);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(-1, 5,
false);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device>>(-1, 5, true);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device>>(-1, 5,
false);
}
};
@ -282,15 +279,20 @@ struct test_dualview_resize {
const unsigned int m = 5;
const unsigned int factor = 2;
ViewType a("A", n, m);
ViewType a;
if constexpr (Initialize)
a = ViewType("A", n, m);
else
a = ViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "A"), n, m);
Kokkos::deep_copy(a.d_view, 1);
/* Covers case "Resize on Device" */
a.modify_device();
if (Initialize)
Kokkos::resize(Kokkos::WithoutInitializing, a, factor * n, factor * m);
else
if constexpr (Initialize)
Kokkos::resize(a, factor * n, factor * m);
else
Kokkos::resize(Kokkos::WithoutInitializing, a, factor * n, factor * m);
ASSERT_EQ(a.extent(0), n * factor);
ASSERT_EQ(a.extent(1), m * factor);
@ -298,33 +300,38 @@ struct test_dualview_resize {
a.sync_host();
// Check device view is initialized as expected
scalar_type a_d_sum = 0;
// Execute on the execution_space associated with t_dev's memory space
using t_dev_exec_space =
typename ViewType::t_dev::memory_space::execution_space;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
a_d_sum);
Kokkos::View<int, typename ViewType::t_dev::memory_space> errors_d(
"errors");
Kokkos::parallel_for(
Kokkos::MDRangePolicy<t_dev_exec_space, Kokkos::Rank<2>>(
{0, 0}, {a.d_view.extent(0), a.d_view.extent(1)}),
KOKKOS_LAMBDA(int i, int j) {
if (a.d_view(i, j) != 1) Kokkos::atomic_inc(errors_d.data());
});
int errors_d_scalar;
Kokkos::deep_copy(errors_d_scalar, errors_d);
// Check host view is synced as expected
scalar_type a_h_sum = 0;
int errors_h_scalar = 0;
for (size_t i = 0; i < a.h_view.extent(0); ++i)
for (size_t j = 0; j < a.h_view.extent(1); ++j) {
a_h_sum += a.h_view(i, j);
if (a.h_view(i, j) != 1) ++errors_h_scalar;
}
// Check
ASSERT_EQ(a_h_sum, a_d_sum);
ASSERT_EQ(a_h_sum, scalar_type(a.extent(0) * a.extent(1)));
ASSERT_EQ(errors_d_scalar, 0);
ASSERT_EQ(errors_h_scalar, 0);
/* Covers case "Resize on Host" */
a.modify_host();
if (Initialize)
Kokkos::resize(Kokkos::WithoutInitializing, a, n / factor, m / factor);
else
if constexpr (Initialize)
Kokkos::resize(a, n / factor, m / factor);
else
Kokkos::resize(Kokkos::WithoutInitializing, a, n / factor, m / factor);
ASSERT_EQ(a.extent(0), n / factor);
ASSERT_EQ(a.extent(1), m / factor);
@ -332,30 +339,33 @@ struct test_dualview_resize {
a.sync_device(Kokkos::DefaultExecutionSpace{});
// Check device view is initialized as expected
a_d_sum = 0;
Kokkos::deep_copy(errors_d, 0);
// Execute on the execution_space associated with t_dev's memory space
using t_dev_exec_space =
typename ViewType::t_dev::memory_space::execution_space;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
a_d_sum);
Kokkos::parallel_for(
Kokkos::MDRangePolicy<t_dev_exec_space, Kokkos::Rank<2>>(
{0, 0}, {a.d_view.extent(0), a.d_view.extent(1)}),
KOKKOS_LAMBDA(int i, int j) {
if (a.d_view(i, j) != 1) Kokkos::atomic_inc(errors_d.data());
});
Kokkos::deep_copy(errors_d_scalar, errors_d);
// Check host view is synced as expected
a_h_sum = 0;
errors_h_scalar = 0;
for (size_t i = 0; i < a.h_view.extent(0); ++i)
for (size_t j = 0; j < a.h_view.extent(1); ++j) {
a_h_sum += a.h_view(i, j);
if (a.h_view(i, j) != 1) ++errors_h_scalar;
}
// Check
ASSERT_EQ(a_h_sum, scalar_type(a.extent(0) * a.extent(1)));
ASSERT_EQ(a_h_sum, a_d_sum);
ASSERT_EQ(errors_d_scalar, 0);
ASSERT_EQ(errors_h_scalar, 0);
} // end run_me
test_dualview_resize() {
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >();
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device>>();
}
};
@ -369,40 +379,51 @@ struct test_dualview_realloc {
const unsigned int n = 10;
const unsigned int m = 5;
ViewType a("A", n, m);
if (Initialize)
Kokkos::realloc(Kokkos::WithoutInitializing, a, n, m);
else
ViewType a;
if constexpr (Initialize) {
a = ViewType("A", n, m);
Kokkos::realloc(a, n, m);
} else {
a = ViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "A"), n, m);
Kokkos::realloc(Kokkos::WithoutInitializing, a, n, m);
}
ASSERT_EQ(a.extent(0), n);
ASSERT_EQ(a.extent(1), m);
Kokkos::deep_copy(a.d_view, 1);
a.modify_device();
a.sync_host();
// Check device view is initialized as expected
scalar_type a_d_sum = 0;
// Execute on the execution_space associated with t_dev's memory space
using t_dev_exec_space =
typename ViewType::t_dev::memory_space::execution_space;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
a_d_sum);
Kokkos::View<int, typename ViewType::t_dev::memory_space> errors_d(
"errors");
Kokkos::parallel_for(
Kokkos::MDRangePolicy<t_dev_exec_space, Kokkos::Rank<2>>(
{0, 0}, {a.d_view.extent(0), a.d_view.extent(1)}),
KOKKOS_LAMBDA(int i, int j) {
if (a.d_view(i, j) != 1) Kokkos::atomic_inc(errors_d.data());
});
int errors_d_scalar;
Kokkos::deep_copy(errors_d_scalar, errors_d);
// Check host view is synced as expected
scalar_type a_h_sum = 0;
int errors_h_scalar = 0;
for (size_t i = 0; i < a.h_view.extent(0); ++i)
for (size_t j = 0; j < a.h_view.extent(1); ++j) {
a_h_sum += a.h_view(i, j);
if (a.h_view(i, j) != 1) ++errors_h_scalar;
}
// Check
ASSERT_EQ(a_h_sum, scalar_type(a.extent(0) * a.extent(1)));
ASSERT_EQ(a_h_sum, a_d_sum);
ASSERT_EQ(errors_d_scalar, 0);
ASSERT_EQ(errors_h_scalar, 0);
} // end run_me
test_dualview_realloc() {
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >();
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device>>();
}
};
@ -463,12 +484,23 @@ TEST(TEST_CATEGORY, dualview_deep_copy) {
test_dualview_deep_copy<double, TEST_EXECSPACE>();
}
struct NoDefaultConstructor {
NoDefaultConstructor(int i_) : i(i_) {}
KOKKOS_FUNCTION operator int() const { return i; }
int i;
};
TEST(TEST_CATEGORY, dualview_realloc) {
test_dualview_realloc<int, TEST_EXECSPACE>();
Impl::test_dualview_realloc<NoDefaultConstructor, TEST_EXECSPACE,
/* Initialize */ false>();
}
TEST(TEST_CATEGORY, dualview_resize) {
test_dualview_resize<int, TEST_EXECSPACE>();
Impl::test_dualview_resize<NoDefaultConstructor, TEST_EXECSPACE,
/* Initialize */ false>();
}
namespace {

View File

@ -68,7 +68,7 @@ struct TestInsert {
} while (rehash_on_fail && failed_count > 0u);
// Trigger the m_size mutable bug.
typename map_type::HostMirror map_h;
auto map_h = create_mirror(map);
execution_space().fence();
Kokkos::deep_copy(map_h, map);
execution_space().fence();
@ -367,7 +367,7 @@ void test_deep_copy(uint32_t num_nodes) {
}
}
host_map_type hmap;
auto hmap = create_mirror(map);
Kokkos::deep_copy(hmap, map);
ASSERT_EQ(map.size(), hmap.size());
@ -380,6 +380,7 @@ void test_deep_copy(uint32_t num_nodes) {
}
map_type mmap;
mmap.allocate_view(hmap);
Kokkos::deep_copy(mmap, hmap);
const_map_type cmap = mmap;
@ -424,7 +425,7 @@ TEST(TEST_CATEGORY, UnorderedMap_valid_empty) {
Map n{};
n = Map{m.capacity()};
n.rehash(m.capacity());
Kokkos::deep_copy(n, m);
n.create_copy_view(m);
ASSERT_TRUE(m.is_allocated());
ASSERT_TRUE(n.is_allocated());
}

View File

@ -21,6 +21,8 @@
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <Kokkos_Macros.hpp>
KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH()
#include <Kokkos_Vector.hpp>
namespace Test {
@ -231,7 +233,7 @@ void test_vector_allocate(unsigned int size) {
TEST(TEST_CATEGORY, vector_combination) {
test_vector_allocate<int, TEST_EXECSPACE>(10);
test_vector_combinations<int, TEST_EXECSPACE>(10);
test_vector_combinations<int, TEST_EXECSPACE>(3057);
test_vector_combinations<long long int, TEST_EXECSPACE>(3057);
}
TEST(TEST_CATEGORY, vector_insert) {

View File

@ -37,6 +37,17 @@
#endif
///@}
/// Some tests are skipped for unified memory space
#if defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
#define GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE \
if constexpr (std::is_same_v<typename TEST_EXECSPACE::memory_space, \
Kokkos::CudaSpace>) \
GTEST_SKIP() << "skipping since unified memory requires additional " \
"fences";
#else
#define GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE
#endif
TEST(TEST_CATEGORY, resize_realloc_no_init_dualview) {
using namespace Kokkos::Test::Tools;
listen_tool_events(Config::DisableAll(), Config::EnableKernels());
@ -657,6 +668,7 @@ TEST(TEST_CATEGORY, create_mirror_no_init_dynamicview) {
TEST(TEST_CATEGORY, create_mirror_view_and_copy_dynamicview) {
GTEST_SKIP_IF_CUDAUVM_MEMORY_SPACE
GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE
using namespace Kokkos::Test::Tools;
listen_tool_events(Config::DisableAll(), Config::EnableKernels(),

View File

@ -390,7 +390,7 @@ static void Test_Atomic(benchmark::State& state) {
static constexpr int LOOP = 100'000;
BENCHMARK(Test_Atomic<int>)->Arg(LOOP)->Iterations(10);
BENCHMARK(Test_Atomic<int>)->Arg(30'000)->Iterations(10);
BENCHMARK(Test_Atomic<long int>)->Arg(LOOP)->Iterations(10);
BENCHMARK(Test_Atomic<long long int>)->Arg(LOOP)->Iterations(10);
BENCHMARK(Test_Atomic<unsigned int>)->Arg(LOOP)->Iterations(10);
@ -398,4 +398,3 @@ BENCHMARK(Test_Atomic<unsigned long int>)->Arg(LOOP)->Iterations(10);
BENCHMARK(Test_Atomic<unsigned long long int>)->Arg(LOOP)->Iterations(10);
BENCHMARK(Test_Atomic<float>)->Arg(LOOP)->Iterations(10);
BENCHMARK(Test_Atomic<double>)->Arg(LOOP)->Iterations(10);
BENCHMARK(Test_Atomic<int>)->Arg(LOOP)->Iterations(10);

View File

@ -183,7 +183,8 @@ double atomic_contentious_max_replacement(benchmark::State& state,
Kokkos::parallel_reduce(
con_length,
KOKKOS_LAMBDA(const int i, T& inner) {
inner = Kokkos::atomic_max_fetch(&(input(0)), inner + 1);
inner = Kokkos::atomic_max_fetch(&(input(0)),
Kokkos::min(inner, max - 1) + 1);
if (i == con_length - 1) {
Kokkos::atomic_max_fetch(&(input(0)), max);
inner = max;
@ -223,7 +224,8 @@ double atomic_contentious_min_replacement(benchmark::State& state,
Kokkos::parallel_reduce(
con_length,
KOKKOS_LAMBDA(const int i, T& inner) {
inner = Kokkos::atomic_min_fetch(&(input(0)), inner - 1);
inner = Kokkos::atomic_min_fetch(&(input(0)),
Kokkos::max(inner, min + 1) - 1);
if (i == con_length - 1) {
Kokkos::atomic_min_fetch(&(input(0)), min);
inner = min;
@ -246,7 +248,7 @@ static void Atomic_ContentiousMinReplacements(benchmark::State& state) {
auto inp = prepare_input(1, std::numeric_limits<T>::max());
for (auto _ : state) {
const auto time = atomic_contentious_max_replacement(state, inp, length);
const auto time = atomic_contentious_min_replacement(state, inp, length);
state.SetIterationTime(time);
}

View File

@ -166,8 +166,17 @@ class Cuda {
Cuda();
Cuda(cudaStream_t stream,
Impl::ManageStream manage_stream = Impl::ManageStream::no);
explicit Cuda(cudaStream_t stream) : Cuda(stream, Impl::ManageStream::no) {}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
template <typename T = void>
KOKKOS_DEPRECATED_WITH_COMMENT(
"Cuda execution space should be constructed explicitly.")
Cuda(cudaStream_t stream)
: Cuda(stream) {}
#endif
Cuda(cudaStream_t stream, Impl::ManageStream manage_stream);
KOKKOS_DEPRECATED Cuda(cudaStream_t stream, bool manage_stream);
@ -186,7 +195,7 @@ class Cuda {
///
/// This matches the __CUDA_ARCH__ specification.
KOKKOS_DEPRECATED static size_type device_arch() {
const cudaDeviceProp& cudaProp = Cuda().cuda_device_prop();
const cudaDeviceProp cudaProp = Cuda().cuda_device_prop();
return cudaProp.major * 100 + cudaProp.minor;
}

View File

@ -31,7 +31,6 @@
#include <algorithm>
#include <atomic>
//#include <Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp>
#include <impl/Kokkos_Error.hpp>
#include <impl/Kokkos_Tools.hpp>
@ -59,12 +58,6 @@ const std::unique_ptr<Kokkos::Cuda> &Kokkos::Impl::cuda_get_deep_copy_space(
namespace Kokkos {
namespace Impl {
namespace {
static std::atomic<int> num_uvm_allocations(0);
} // namespace
void DeepCopyCuda(void *dst, const void *src, size_t n) {
KOKKOS_IMPL_CUDA_SAFE_CALL((CudaInternal::singleton().cuda_memcpy_wrapper(
dst, src, n, cudaMemcpyDefault)));
@ -184,6 +177,29 @@ void *impl_allocate_common(const int device_id,
cudaError_t error_code = cudaSuccess;
#ifndef CUDART_VERSION
#error CUDART_VERSION undefined!
#elif defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
// This is intended for Grace-Hopper (and future unified memory architectures)
// The idea is to use host allocator and then advise to keep it in HBM on the
// device, but that requires CUDA 12.2
static_assert(CUDART_VERSION >= 12020,
"CUDA runtime version >=12.2 required when "
"Kokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY is set. "
"Please update your CUDA runtime version or "
"reconfigure with "
"-D Kokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY=OFF");
if (arg_alloc_size) { // cudaMemAdvise_v2 does not work with nullptr
error_code = cudaMallocManaged(&ptr, arg_alloc_size, cudaMemAttachGlobal);
if (error_code == cudaSuccess) {
// One would think cudaMemLocation{device_id,
// cudaMemLocationTypeDevice} would work but it doesn't. I.e. the order of
// members doesn't seem to be defined.
cudaMemLocation loc;
loc.id = device_id;
loc.type = cudaMemLocationTypeDevice;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemAdvise_v2(
ptr, arg_alloc_size, cudaMemAdviseSetPreferredLocation, loc));
}
}
#elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020)
if (arg_alloc_size >= memory_threshold_g) {
error_code = cudaMallocAsync(&ptr, arg_alloc_size, stream);
@ -196,18 +212,19 @@ void *impl_allocate_common(const int device_id,
"Kokkos::Cuda: backend fence after async malloc");
}
}
} else
} else {
error_code = cudaMalloc(&ptr, arg_alloc_size);
}
#else
error_code = cudaMalloc(&ptr, arg_alloc_size);
#endif
{ error_code = cudaMalloc(&ptr, arg_alloc_size); }
if (error_code != cudaSuccess) { // TODO tag as unlikely branch
// This is the only way to clear the last error, which
// we should do here since we're turning it into an
// exception here
cudaGetLastError();
throw Experimental::CudaRawMemoryAllocationFailure(
arg_alloc_size, error_code,
Experimental::RawMemoryAllocationFailure::AllocationMechanism::
CudaMalloc);
Kokkos::Impl::throw_bad_alloc(arg_handle.name, arg_alloc_size, arg_label);
}
if (Kokkos::Profiling::profileLibraryLoaded()) {
@ -252,8 +269,6 @@ void *CudaUVMSpace::impl_allocate(
Cuda::impl_static_fence(
"Kokkos::CudaUVMSpace::impl_allocate: Pre UVM Allocation");
if (arg_alloc_size > 0) {
Kokkos::Impl::num_uvm_allocations++;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
cudaError_t error_code =
cudaMallocManaged(&ptr, arg_alloc_size, cudaMemAttachGlobal);
@ -263,10 +278,7 @@ void *CudaUVMSpace::impl_allocate(
// we should do here since we're turning it into an
// exception here
cudaGetLastError();
throw Experimental::CudaRawMemoryAllocationFailure(
arg_alloc_size, error_code,
Experimental::RawMemoryAllocationFailure::AllocationMechanism::
CudaMallocManaged);
Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label);
}
#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
@ -307,10 +319,7 @@ void *CudaHostPinnedSpace::impl_allocate(
// we should do here since we're turning it into an
// exception here
cudaGetLastError();
throw Experimental::CudaRawMemoryAllocationFailure(
arg_alloc_size, error_code,
Experimental::RawMemoryAllocationFailure::AllocationMechanism::
CudaHostAlloc);
Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label);
}
if (Kokkos::Profiling::profileLibraryLoaded()) {
const size_t reported_size =
@ -341,27 +350,27 @@ void CudaSpace::impl_deallocate(
Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr,
reported_size);
}
try {
#ifndef CUDART_VERSION
#error CUDART_VERSION undefined!
#elif defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr));
#elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020)
if (arg_alloc_size >= memory_threshold_g) {
Impl::cuda_device_synchronize(
"Kokkos::Cuda: backend fence before async free");
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeAsync(arg_alloc_ptr, m_stream));
Impl::cuda_device_synchronize(
"Kokkos::Cuda: backend fence after async free");
} else {
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr));
}
#else
if (arg_alloc_size >= memory_threshold_g) {
Impl::cuda_device_synchronize(
"Kokkos::Cuda: backend fence before async free");
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeAsync(arg_alloc_ptr, m_stream));
Impl::cuda_device_synchronize(
"Kokkos::Cuda: backend fence after async free");
} else {
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr));
#endif
} catch (...) {
}
#else
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr));
#endif
}
void CudaUVMSpace::deallocate(void *const arg_alloc_ptr,
const size_t arg_alloc_size) const {
@ -387,13 +396,9 @@ void CudaUVMSpace::impl_deallocate(
Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr,
reported_size);
}
try {
if (arg_alloc_ptr != nullptr) {
Kokkos::Impl::num_uvm_allocations--;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr));
}
} catch (...) {
if (arg_alloc_ptr != nullptr) {
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr));
}
Cuda::impl_static_fence(
"Kokkos::CudaUVMSpace::impl_deallocate: Post UVM Deallocation");
@ -420,11 +425,8 @@ void CudaHostPinnedSpace::impl_deallocate(
Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr,
reported_size);
}
try {
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeHost(arg_alloc_ptr));
} catch (...) {
}
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeHost(arg_alloc_ptr));
}
} // namespace Kokkos
@ -463,8 +465,12 @@ void cuda_prefetch_pointer(const Cuda &space, const void *ptr, size_t bytes,
#include <impl/Kokkos_SharedAlloc_timpl.hpp>
#if !defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(
Kokkos::CudaSpace);
#else
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(Kokkos::CudaSpace);
#endif
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(
Kokkos::CudaUVMSpace);
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(

View File

@ -88,6 +88,19 @@ class CudaSpace {
void* allocate(const char* arg_label, const size_t arg_alloc_size,
const size_t arg_logical_size = 0) const;
#if defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
template <typename ExecutionSpace>
void* allocate(const ExecutionSpace&, const size_t arg_alloc_size) const {
return allocate(arg_alloc_size);
}
template <typename ExecutionSpace>
void* allocate(const ExecutionSpace&, const char* arg_label,
const size_t arg_alloc_size,
const size_t arg_logical_size = 0) const {
return allocate(arg_label, arg_alloc_size, arg_logical_size);
}
#endif
/**\brief Deallocate untracked memory in the cuda space */
void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
void deallocate(const char* arg_label, void* const arg_alloc_ptr,
@ -337,7 +350,11 @@ static_assert(
template <>
struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaSpace> {
enum : bool { assignable = false };
enum : bool { accessible = false };
#if !defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
enum : bool{accessible = false};
#else
enum : bool { accessible = true };
#endif
enum : bool { deepcopy = true };
};
@ -558,8 +575,12 @@ struct DeepCopy<HostSpace, MemSpace, ExecutionSpace,
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if !defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_SPECIALIZATION(
Kokkos::CudaSpace);
#else
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::CudaSpace);
#endif
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::CudaUVMSpace);
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::CudaHostPinnedSpace);

View File

@ -22,7 +22,6 @@
#include <impl/Kokkos_Error.hpp>
#include <impl/Kokkos_Profiling.hpp>
#include <iosfwd>
namespace Kokkos {
namespace Impl {
@ -69,52 +68,6 @@ inline void cuda_internal_safe_call(cudaError e, const char* name,
Kokkos::Impl::cuda_internal_safe_call(call, #call, __FILE__, __LINE__)
} // namespace Impl
namespace Experimental {
class CudaRawMemoryAllocationFailure : public RawMemoryAllocationFailure {
private:
using base_t = RawMemoryAllocationFailure;
cudaError_t m_error_code = cudaSuccess;
static FailureMode get_failure_mode(cudaError_t error_code) {
switch (error_code) {
case cudaErrorMemoryAllocation: return FailureMode::OutOfMemoryError;
case cudaErrorInvalidValue: return FailureMode::InvalidAllocationSize;
// TODO handle cudaErrorNotSupported for cudaMallocManaged
default: return FailureMode::Unknown;
}
}
public:
// using base_t::base_t;
// would trigger
//
// error: cannot determine the exception specification of the default
// constructor due to a circular dependency
//
// using NVCC 9.1 and gcc 7.4
CudaRawMemoryAllocationFailure(
size_t arg_attempted_size, size_t arg_attempted_alignment,
FailureMode arg_failure_mode = FailureMode::OutOfMemoryError,
AllocationMechanism arg_mechanism =
AllocationMechanism::StdMalloc) noexcept
: base_t(arg_attempted_size, arg_attempted_alignment, arg_failure_mode,
arg_mechanism) {}
CudaRawMemoryAllocationFailure(size_t arg_attempted_size,
cudaError_t arg_error_code,
AllocationMechanism arg_mechanism) noexcept
: base_t(arg_attempted_size, /* CudaSpace doesn't handle alignment? */ 1,
get_failure_mode(arg_error_code), arg_mechanism),
m_error_code(arg_error_code) {}
void append_additional_error_information(std::ostream& o) const override;
};
} // end namespace Experimental
} // namespace Kokkos
#endif // KOKKOS_ENABLE_CUDA

View File

@ -72,7 +72,7 @@ struct GraphImpl<Kokkos::Cuda> {
GraphNodeImpl<Kokkos::Cuda, aggregate_kernel_impl_t,
Kokkos::Experimental::TypeErasedTag>;
// Not moveable or copyable; it spends its whole life as a shared_ptr in the
// Not movable or copyable; it spends its whole life as a shared_ptr in the
// Graph object
GraphImpl() = delete;
GraphImpl(GraphImpl const&) = delete;
@ -115,12 +115,9 @@ struct GraphImpl<Kokkos::Cuda> {
template <class NodeImpl>
// requires NodeImplPtr is a shared_ptr to specialization of GraphNodeImpl
// Also requires that the kernel has the graph node tag in it's policy
// Also requires that the kernel has the graph node tag in its policy
void add_node(std::shared_ptr<NodeImpl> const& arg_node_ptr) {
static_assert(
NodeImpl::kernel_type::Policy::is_graph_kernel::value,
"Something has gone horribly wrong, but it's too complicated to "
"explain here. Buy Daisy a coffee and she'll explain it to you.");
static_assert(NodeImpl::kernel_type::Policy::is_graph_kernel::value);
KOKKOS_EXPECTS(bool(arg_node_ptr));
// The Kernel launch from the execute() method has been shimmed to insert
// the node into the graph

View File

@ -607,6 +607,22 @@ Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default
//----------------------------------
#ifdef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
// Check if unified memory is available
int cuda_result;
cudaDeviceGetAttribute(&cuda_result, cudaDevAttrConcurrentManagedAccess,
cuda_device_id);
if (cuda_result == 0) {
Kokkos::abort(
"Kokkos::Cuda::initialize ERROR: Unified memory is not available on "
"this device\n"
"Please recompile Kokkos with "
"-DKokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY=OFF\n");
}
#endif
//----------------------------------
cudaStream_t singleton_stream;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(cuda_device_id));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamCreate(&singleton_stream));
@ -705,6 +721,10 @@ void Cuda::print_configuration(std::ostream &os, bool /*verbose*/) const {
#else
os << "no\n";
#endif
#ifdef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
os << " KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY: ";
os << "yes\n";
#endif
os << "\nCuda Runtime Configuration:\n";
@ -737,6 +757,14 @@ namespace Impl {
int g_cuda_space_factory_initialized =
initialize_space_factory<Cuda>("150_Cuda");
int CudaInternal::m_cudaArch = -1;
cudaDeviceProp CudaInternal::m_deviceProp;
std::set<int> CudaInternal::cuda_devices = {};
std::map<int, unsigned long *> CudaInternal::constantMemHostStagingPerDevice =
{};
std::map<int, cudaEvent_t> CudaInternal::constantMemReusablePerDevice = {};
std::map<int, std::mutex> CudaInternal::constantMemMutexPerDevice = {};
} // namespace Impl
} // namespace Kokkos

View File

@ -91,10 +91,10 @@ class CudaInternal {
int m_cudaDev = -1;
// Device Properties
inline static int m_cudaArch = -1;
static int m_cudaArch;
static int concurrency();
inline static cudaDeviceProp m_deviceProp;
static cudaDeviceProp m_deviceProp;
// Scratch Spaces for Reductions
mutable std::size_t m_scratchSpaceCount;
@ -120,11 +120,10 @@ class CudaInternal {
bool was_initialized = false;
bool was_finalized = false;
inline static std::set<int> cuda_devices = {};
inline static std::map<int, unsigned long*> constantMemHostStagingPerDevice =
{};
inline static std::map<int, cudaEvent_t> constantMemReusablePerDevice = {};
inline static std::map<int, std::mutex> constantMemMutexPerDevice = {};
static std::set<int> cuda_devices;
static std::map<int, unsigned long*> constantMemHostStagingPerDevice;
static std::map<int, cudaEvent_t> constantMemReusablePerDevice;
static std::map<int, std::mutex> constantMemMutexPerDevice;
static CudaInternal& singleton();
@ -421,23 +420,6 @@ class CudaInternal {
return cudaStreamSynchronize(stream);
}
// The following are only available for cuda 11.2 and greater
#if (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020)
template <bool setCudaDevice = true>
cudaError_t cuda_malloc_async_wrapper(void** devPtr, size_t size,
cudaStream_t hStream = nullptr) const {
if constexpr (setCudaDevice) set_cuda_device();
return cudaMallocAsync(devPtr, size, get_input_stream(hStream));
}
template <bool setCudaDevice = true>
cudaError_t cuda_free_async_wrapper(void* devPtr,
cudaStream_t hStream = nullptr) const {
if constexpr (setCudaDevice) set_cuda_device();
return cudaFreeAsync(devPtr, get_input_stream(hStream));
}
#endif
// C++ API routines
template <typename T, bool setCudaDevice = true>
cudaError_t cuda_func_get_attributes_wrapper(cudaFuncAttributes* attr,

View File

@ -539,17 +539,9 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
m_vector_size(arg_policy.impl_vector_length()) {
auto internal_space_instance =
m_policy.space().impl_internal_space_instance();
cudaFuncAttributes attr =
CudaParallelLaunch<ParallelFor, LaunchBounds>::get_cuda_func_attributes(
internal_space_instance->m_cudaDev);
m_team_size =
m_team_size >= 0
? m_team_size
: Kokkos::Impl::cuda_get_opt_block_size<FunctorType, LaunchBounds>(
internal_space_instance, attr, m_functor, m_vector_size,
m_policy.team_scratch_size(0),
m_policy.thread_scratch_size(0)) /
m_vector_size;
m_team_size = m_team_size >= 0 ? m_team_size
: arg_policy.team_size_recommended(
arg_functor, ParallelForTag());
m_shmem_begin = (sizeof(double) * (m_team_size + 2));
m_shmem_size =
@ -585,13 +577,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
"Kokkos::Impl::ParallelFor< Cuda > insufficient shared memory"));
}
if (int(m_team_size) >
int(Kokkos::Impl::cuda_get_max_block_size<FunctorType, LaunchBounds>(
internal_space_instance, attr, arg_functor,
arg_policy.impl_vector_length(),
arg_policy.team_scratch_size(0),
arg_policy.thread_scratch_size(0)) /
arg_policy.impl_vector_length())) {
if (m_team_size > arg_policy.team_size_max(arg_functor, ParallelForTag())) {
Kokkos::Impl::throw_runtime_exception(std::string(
"Kokkos::Impl::ParallelFor< Cuda > requested too large team size."));
}
@ -909,17 +895,11 @@ class ParallelReduce<CombinedFunctorReducerType,
m_vector_size(arg_policy.impl_vector_length()) {
auto internal_space_instance =
m_policy.space().impl_internal_space_instance();
cudaFuncAttributes attr = CudaParallelLaunch<ParallelReduce, LaunchBounds>::
get_cuda_func_attributes(internal_space_instance->m_cudaDev);
m_team_size =
m_team_size >= 0
? m_team_size
: Kokkos::Impl::cuda_get_opt_block_size<FunctorType, LaunchBounds>(
internal_space_instance, attr,
m_functor_reducer.get_functor(), m_vector_size,
m_policy.team_scratch_size(0),
m_policy.thread_scratch_size(0)) /
m_vector_size;
m_team_size = m_team_size >= 0 ? m_team_size
: arg_policy.team_size_recommended(
arg_functor_reducer.get_functor(),
arg_functor_reducer.get_reducer(),
ParallelReduceTag());
m_team_begin =
UseShflReduction

View File

@ -28,35 +28,20 @@ extern "C" {
/* Cuda runtime function, declared in <crt/device_runtime.h>
* Requires capability 2.x or better.
*/
extern __device__ void __assertfail(const void *message, const void *file,
unsigned int line, const void *function,
size_t charsize);
[[noreturn]] __device__ void __assertfail(const void *message, const void *file,
unsigned int line,
const void *function,
size_t charsize);
}
namespace Kokkos {
namespace Impl {
// required to workaround failures in random number generator unit tests with
// pre-volta architectures
#if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK)
__device__ inline void cuda_abort(const char *const message) {
#else
[[noreturn]] __device__ inline void cuda_abort(const char *const message) {
#endif
[[noreturn]] __device__ static void cuda_abort(const char *const message) {
const char empty[] = "";
__assertfail((const void *)message, (const void *)empty, (unsigned int)0,
(const void *)empty, sizeof(char));
// This loop is never executed. It's intended to suppress warnings that the
// function returns, even though it does not. This is necessary because
// __assertfail is not marked as [[noreturn]], even though it does not return.
// Disable with KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK to workaround failures
// in random number generator unit tests with pre-volta architectures
#if !defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK)
while (true)
;
#endif
}
} // namespace Impl

View File

@ -48,8 +48,19 @@ class HIP {
using scratch_memory_space = ScratchMemorySpace<HIP>;
HIP();
HIP(hipStream_t stream,
Impl::ManageStream manage_stream = Impl::ManageStream::no);
explicit HIP(hipStream_t stream) : HIP(stream, Impl::ManageStream::no) {}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
template <typename T = void>
KOKKOS_DEPRECATED_WITH_COMMENT(
"HIP execution space should be constructed explicitly.")
HIP(hipStream_t stream)
: HIP(stream) {}
#endif
HIP(hipStream_t stream, Impl::ManageStream manage_stream);
KOKKOS_DEPRECATED HIP(hipStream_t stream, bool manage_stream);
//@}

View File

@ -22,8 +22,6 @@
#include <hip/hip_runtime.h>
#include <ostream>
namespace Kokkos {
namespace Impl {
@ -44,39 +42,4 @@ inline void hip_internal_safe_call(hipError_t e, const char* name,
#define KOKKOS_IMPL_HIP_SAFE_CALL(call) \
Kokkos::Impl::hip_internal_safe_call(call, #call, __FILE__, __LINE__)
namespace Kokkos {
namespace Experimental {
class HIPRawMemoryAllocationFailure : public RawMemoryAllocationFailure {
private:
hipError_t m_error_code = hipSuccess;
static FailureMode get_failure_mode(hipError_t error_code) {
switch (error_code) {
case hipErrorMemoryAllocation: return FailureMode::OutOfMemoryError;
case hipErrorInvalidValue: return FailureMode::InvalidAllocationSize;
default: return FailureMode::Unknown;
}
}
public:
HIPRawMemoryAllocationFailure(size_t arg_attempted_size,
hipError_t arg_error_code,
AllocationMechanism arg_mechanism) noexcept
: RawMemoryAllocationFailure(
arg_attempted_size, /* HIPSpace doesn't handle alignment? */ 1,
get_failure_mode(arg_error_code), arg_mechanism),
m_error_code(arg_error_code) {}
void append_additional_error_information(std::ostream& o) const override {
if (m_error_code != hipSuccess) {
o << " The HIP allocation returned the error code \""
<< hipGetErrorName(m_error_code) << "\".";
}
}
};
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -40,7 +40,7 @@ class GraphImpl<Kokkos::HIP> {
GraphNodeImpl<Kokkos::HIP, aggregate_kernel_impl_t,
Kokkos::Experimental::TypeErasedTag>;
// Not moveable or copyable; it spends its whole life as a shared_ptr in the
// Not movable or copyable; it spends its whole life as a shared_ptr in the
// Graph object.
GraphImpl() = delete;
GraphImpl(GraphImpl const&) = delete;
@ -108,7 +108,7 @@ inline void GraphImpl<Kokkos::HIP>::add_node(
}
// Requires NodeImplPtr is a shared_ptr to specialization of GraphNodeImpl
// Also requires that the kernel has the graph node tag in it's policy
// Also requires that the kernel has the graph node tag in its policy
template <class NodeImpl>
inline void GraphImpl<Kokkos::HIP>::add_node(
std::shared_ptr<NodeImpl> const& arg_node_ptr) {

View File

@ -353,6 +353,22 @@ void HIPInternal::finalize() {
m_num_scratch_locks = 0;
}
int HIPInternal::m_hipDev = -1;
unsigned HIPInternal::m_multiProcCount = 0;
unsigned HIPInternal::m_maxWarpCount = 0;
std::array<HIPInternal::size_type, 3> HIPInternal::m_maxBlock = {0, 0, 0};
unsigned HIPInternal::m_maxWavesPerCU = 0;
int HIPInternal::m_shmemPerSM = 0;
int HIPInternal::m_maxShmemPerBlock = 0;
int HIPInternal::m_maxThreadsPerSM = 0;
hipDeviceProp_t HIPInternal::m_deviceProp;
std::mutex HIPInternal::scratchFunctorMutex;
unsigned long *HIPInternal::constantMemHostStaging = nullptr;
hipEvent_t HIPInternal::constantMemReusable = nullptr;
std::mutex HIPInternal::constantMemMutex;
//----------------------------------------------------------------------------
Kokkos::HIP::size_type hip_internal_multiprocessor_count() {

View File

@ -35,8 +35,7 @@ struct HIPTraits {
static constexpr int WarpSize = 64;
static constexpr int WarpIndexMask = 0x003f; /* hexadecimal for 63 */
static constexpr int WarpIndexShift = 6; /* WarpSize == 1 << WarpShift*/
#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100) || \
defined(KOKKOS_ARCH_AMD_GFX1103)
#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100)
static constexpr int WarpSize = 32;
static constexpr int WarpIndexMask = 0x001f; /* hexadecimal for 31 */
static constexpr int WarpIndexShift = 5; /* WarpSize == 1 << WarpShift*/
@ -71,16 +70,16 @@ class HIPInternal {
public:
using size_type = ::Kokkos::HIP::size_type;
inline static int m_hipDev = -1;
inline static unsigned m_multiProcCount = 0;
inline static unsigned m_maxWarpCount = 0;
inline static std::array<size_type, 3> m_maxBlock = {0, 0, 0};
inline static unsigned m_maxWavesPerCU = 0;
inline static int m_shmemPerSM = 0;
inline static int m_maxShmemPerBlock = 0;
inline static int m_maxThreadsPerSM = 0;
static int m_hipDev;
static unsigned m_multiProcCount;
static unsigned m_maxWarpCount;
static std::array<size_type, 3> m_maxBlock;
static unsigned m_maxWavesPerCU;
static int m_shmemPerSM;
static int m_maxShmemPerBlock;
static int m_maxThreadsPerSM;
inline static hipDeviceProp_t m_deviceProp;
static hipDeviceProp_t m_deviceProp;
static int concurrency();
@ -93,7 +92,7 @@ class HIPInternal {
size_type *m_scratchFlags = nullptr;
mutable size_type *m_scratchFunctor = nullptr;
mutable size_type *m_scratchFunctorHost = nullptr;
inline static std::mutex scratchFunctorMutex;
static std::mutex scratchFunctorMutex;
hipStream_t m_stream = nullptr;
uint32_t m_instance_id =
@ -112,9 +111,9 @@ class HIPInternal {
// FIXME_HIP: these want to be per-device, not per-stream... use of 'static'
// here will break once there are multiple devices though
inline static unsigned long *constantMemHostStaging = nullptr;
inline static hipEvent_t constantMemReusable = nullptr;
inline static std::mutex constantMemMutex;
static unsigned long *constantMemHostStaging;
static hipEvent_t constantMemReusable;
static std::mutex constantMemMutex;
static HIPInternal &singleton();

View File

@ -50,6 +50,7 @@ class ParallelReduce<CombinedFunctorReducerType,
using value_type = typename ReducerType::value_type;
using reference_type = typename ReducerType::reference_type;
using functor_type = FunctorType;
using reducer_type = ReducerType;
using size_type = HIP::size_type;
// Conditionally set word_size_type to int16_t or int8_t if value_type is

View File

@ -31,7 +31,7 @@ template <class CombinedFunctorReducerType, class... Properties>
class ParallelReduce<CombinedFunctorReducerType,
Kokkos::TeamPolicy<Properties...>, HIP> {
public:
using Policy = TeamPolicyInternal<HIP, Properties...>;
using Policy = TeamPolicy<Properties...>;
using FunctorType = typename CombinedFunctorReducerType::functor_type;
using ReducerType = typename CombinedFunctorReducerType::reducer_type;
@ -46,6 +46,7 @@ class ParallelReduce<CombinedFunctorReducerType,
public:
using functor_type = FunctorType;
using reducer_type = ReducerType;
using size_type = HIP::size_type;
// static int constexpr UseShflReduction = false;

View File

@ -39,6 +39,7 @@
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace {
static std::atomic<bool> is_first_hip_managed_allocation(true);
@ -66,7 +67,6 @@ void* HIPSpace::allocate(
return impl_allocate(arg_label, arg_alloc_size, arg_logical_size);
}
void* HIPSpace::impl_allocate(
const char* arg_label, const size_t arg_alloc_size,
const size_t arg_logical_size,
const Kokkos::Tools::SpaceHandle arg_handle) const {
@ -77,10 +77,7 @@ void* HIPSpace::impl_allocate(
// This is the only way to clear the last error, which we should do here
// since we're turning it into an exception here
(void)hipGetLastError();
throw Experimental::HIPRawMemoryAllocationFailure(
arg_alloc_size, error_code,
Experimental::RawMemoryAllocationFailure::AllocationMechanism::
HIPMalloc);
Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label);
}
if (Kokkos::Profiling::profileLibraryLoaded()) {
const size_t reported_size =
@ -111,10 +108,7 @@ void* HIPHostPinnedSpace::impl_allocate(
// This is the only way to clear the last error, which we should do here
// since we're turning it into an exception here
(void)hipGetLastError();
throw Experimental::HIPRawMemoryAllocationFailure(
arg_alloc_size, error_code,
Experimental::RawMemoryAllocationFailure::AllocationMechanism::
HIPHostMalloc);
Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label);
}
if (Kokkos::Profiling::profileLibraryLoaded()) {
const size_t reported_size =
@ -178,10 +172,7 @@ Kokkos::HIP::runtime WARNING: Kokkos did not find an environment variable 'HSA_X
// This is the only way to clear the last error, which we should do here
// since we're turning it into an exception here
(void)hipGetLastError();
throw Experimental::HIPRawMemoryAllocationFailure(
arg_alloc_size, error_code,
Experimental::RawMemoryAllocationFailure::AllocationMechanism::
HIPMallocManaged);
Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label);
}
KOKKOS_IMPL_HIP_SAFE_CALL(hipMemAdvise(
ptr, arg_alloc_size, hipMemAdviseSetCoarseGrain, m_device));

View File

@ -153,7 +153,7 @@ void HPX::impl_instance_fence_locked(const std::string &name) const {
auto &s = impl_get_sender();
hpx::this_thread::experimental::sync_wait(std::move(s));
s = hpx::execution::experimental::unique_any_sender(
s = hpx::execution::experimental::unique_any_sender<>(
hpx::execution::experimental::just());
});
}
@ -184,7 +184,7 @@ void HPX::impl_static_fence(const std::string &name) {
}
hpx::this_thread::experimental::sync_wait(std::move(s));
s = hpx::execution::experimental::unique_any_sender(
s = hpx::execution::experimental::unique_any_sender<>(
hpx::execution::experimental::just());
});
}

View File

@ -168,17 +168,31 @@ class HPX {
: m_instance_data(Kokkos::Impl::HostSharedPtr<instance_data>(
&m_default_instance_data, &default_instance_deleter)) {}
~HPX() = default;
HPX(instance_mode mode)
explicit HPX(instance_mode mode)
: m_instance_data(
mode == instance_mode::independent
? (Kokkos::Impl::HostSharedPtr<instance_data>(
new instance_data(m_next_instance_id++)))
: Kokkos::Impl::HostSharedPtr<instance_data>(
&m_default_instance_data, &default_instance_deleter)) {}
HPX(hpx::execution::experimental::unique_any_sender<> &&sender)
explicit HPX(hpx::execution::experimental::unique_any_sender<> &&sender)
: m_instance_data(Kokkos::Impl::HostSharedPtr<instance_data>(
new instance_data(m_next_instance_id++, std::move(sender)))) {}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
template <typename T = void>
KOKKOS_DEPRECATED_WITH_COMMENT(
"HPX execution space should be constructed explicitly.")
HPX(instance_mode mode)
: HPX(mode) {}
template <typename T = void>
KOKKOS_DEPRECATED_WITH_COMMENT(
"HPX execution space should be constructed explicitly.")
HPX(hpx::execution::experimental::unique_any_sender<> &&sender)
: HPX(std::move(sender)) {}
#endif
HPX(HPX &&other) = default;
HPX(const HPX &other) = default;

View File

@ -29,7 +29,6 @@
#include <type_traits>
#include <algorithm>
#include <utility>
#include <limits>
#include <cstddef>
namespace Kokkos {
@ -80,7 +79,11 @@ struct ArrayBoundsCheck<Integral, false> {
/**\brief Derived from the C++17 'std::array'.
* Dropping the iterator interface.
*/
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
template <class T = void, size_t N = KOKKOS_INVALID_INDEX, class Proxy = void>
#else
template <class T, size_t N>
#endif
struct Array {
public:
/**
@ -129,10 +132,38 @@ struct Array {
KOKKOS_INLINE_FUNCTION constexpr const_pointer data() const {
return &m_internal_implementation_private_member_data[0];
}
friend KOKKOS_FUNCTION constexpr bool operator==(Array const& lhs,
Array const& rhs) noexcept {
for (size_t i = 0; i != N; ++i)
if (lhs[i] != rhs[i]) return false;
return true;
}
friend KOKKOS_FUNCTION constexpr bool operator!=(Array const& lhs,
Array const& rhs) noexcept {
return !(lhs == rhs);
}
private:
template <class U = T>
friend KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t<
Impl::is_swappable<U>::value>
kokkos_swap(Array<T, N>& a,
Array<T, N>& b) noexcept(Impl::is_nothrow_swappable_v<U>) {
for (std::size_t i = 0; i < N; ++i) {
kokkos_swap(a[i], b[i]);
}
}
};
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
template <class T, class Proxy>
struct Array<T, 0, Proxy> {
#else
template <class T>
struct Array<T, 0> {
#endif
public:
using reference = T&;
using const_reference = std::add_const_t<T>&;
@ -167,25 +198,35 @@ struct Array<T, 0, Proxy> {
KOKKOS_INLINE_FUNCTION pointer data() { return nullptr; }
KOKKOS_INLINE_FUNCTION const_pointer data() const { return nullptr; }
KOKKOS_DEFAULTED_FUNCTION ~Array() = default;
KOKKOS_DEFAULTED_FUNCTION Array() = default;
KOKKOS_DEFAULTED_FUNCTION Array(const Array&) = default;
KOKKOS_DEFAULTED_FUNCTION Array& operator=(const Array&) = default;
friend KOKKOS_FUNCTION constexpr bool operator==(Array const&,
Array const&) noexcept {
return true;
}
friend KOKKOS_FUNCTION constexpr bool operator!=(Array const&,
Array const&) noexcept {
return false;
}
// Some supported compilers are not sufficiently C++11 compliant
// for default move constructor and move assignment operator.
// Array( Array && ) = default ;
// Array & operator = ( Array && ) = default ;
private:
friend KOKKOS_INLINE_FUNCTION constexpr void kokkos_swap(
Array<T, 0>&, Array<T, 0>&) noexcept {}
};
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
namespace Impl {
struct KokkosArrayContiguous {};
struct KokkosArrayStrided {};
} // namespace Impl
template <>
struct Array<void, KOKKOS_INVALID_INDEX, void> {
struct contiguous {};
struct strided {};
struct KOKKOS_DEPRECATED Array<void, KOKKOS_INVALID_INDEX, void> {
using contiguous = Impl::KokkosArrayContiguous;
using strided = Impl::KokkosArrayStrided;
};
template <class T>
struct Array<T, KOKKOS_INVALID_INDEX, Array<>::contiguous> {
struct KOKKOS_DEPRECATED
Array<T, KOKKOS_INVALID_INDEX, Impl::KokkosArrayContiguous> {
private:
T* m_elem;
size_t m_size;
@ -253,7 +294,8 @@ struct Array<T, KOKKOS_INVALID_INDEX, Array<>::contiguous> {
};
template <class T>
struct Array<T, KOKKOS_INVALID_INDEX, Array<>::strided> {
struct KOKKOS_DEPRECATED
Array<T, KOKKOS_INVALID_INDEX, Impl::KokkosArrayStrided> {
private:
T* m_elem;
size_t m_size;
@ -320,10 +362,37 @@ struct Array<T, KOKKOS_INVALID_INDEX, Array<>::strided> {
size_type arg_stride)
: m_elem(arg_ptr), m_size(arg_size), m_stride(arg_stride) {}
};
#endif
template <typename T, typename... Us>
Array(T, Us...)->Array<T, 1 + sizeof...(Us)>;
namespace Impl {
template <typename T, size_t N, size_t... I>
KOKKOS_FUNCTION constexpr Array<std::remove_cv_t<T>, N> to_array_impl(
T (&a)[N], std::index_sequence<I...>) {
return {{a[I]...}};
}
template <typename T, size_t N, size_t... I>
KOKKOS_FUNCTION constexpr Array<std::remove_cv_t<T>, N> to_array_impl(
T(&&a)[N], std::index_sequence<I...>) {
return {{std::move(a[I])...}};
}
} // namespace Impl
template <typename T, size_t N>
KOKKOS_FUNCTION constexpr auto to_array(T (&a)[N]) {
return Impl::to_array_impl(a, std::make_index_sequence<N>{});
}
template <typename T, size_t N>
KOKKOS_FUNCTION constexpr auto to_array(T(&&a)[N]) {
return Impl::to_array_impl(std::move(a), std::make_index_sequence<N>{});
}
} // namespace Kokkos
//<editor-fold desc="Support for structured binding">
@ -333,6 +402,7 @@ struct std::tuple_size<Kokkos::Array<T, N>>
template <std::size_t I, class T, std::size_t N>
struct std::tuple_element<I, Kokkos::Array<T, N>> {
static_assert(I < N);
using type = T;
};
@ -340,21 +410,25 @@ namespace Kokkos {
template <std::size_t I, class T, std::size_t N>
KOKKOS_FUNCTION constexpr T& get(Array<T, N>& a) noexcept {
static_assert(I < N);
return a[I];
}
template <std::size_t I, class T, std::size_t N>
KOKKOS_FUNCTION constexpr T const& get(Array<T, N> const& a) noexcept {
static_assert(I < N);
return a[I];
}
template <std::size_t I, class T, std::size_t N>
KOKKOS_FUNCTION constexpr T&& get(Array<T, N>&& a) noexcept {
static_assert(I < N);
return std::move(a[I]);
}
template <std::size_t I, class T, std::size_t N>
KOKKOS_FUNCTION constexpr T const&& get(Array<T, N> const&& a) noexcept {
static_assert(I < N);
return std::move(a[I]);
}

View File

@ -22,7 +22,6 @@ static_assert(false,
#ifndef KOKKOS_DESUL_ATOMICS_VOLATILE_WRAPPER_HPP_
#define KOKKOS_DESUL_ATOMICS_VOLATILE_WRAPPER_HPP_
#include <Kokkos_Macros.hpp>
#include <Kokkos_Atomics_Desul_Config.hpp>
#include <desul/atomics.hpp>
#ifdef KOKKOS_ENABLE_ATOMICS_BYPASS

View File

@ -22,8 +22,6 @@ static_assert(false,
#ifndef KOKKOS_DESUL_ATOMICS_WRAPPER_HPP_
#define KOKKOS_DESUL_ATOMICS_WRAPPER_HPP_
#include <Kokkos_Macros.hpp>
#include <Kokkos_Atomics_Desul_Config.hpp>
#include <desul/atomics.hpp>
#include <impl/Kokkos_Volatile_Load.hpp>

View File

@ -28,6 +28,7 @@
#include <complex>
#include <type_traits>
#include <iosfwd>
#include <tuple>
namespace Kokkos {
@ -256,6 +257,12 @@ class
return *this;
}
template <size_t I, typename RT>
friend constexpr const RT& get(const complex<RT>&) noexcept;
template <size_t I, typename RT>
friend constexpr const RT&& get(const complex<RT>&&) noexcept;
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
//! Copy constructor from volatile.
template <
@ -423,6 +430,75 @@ class
#endif // KOKKOS_ENABLE_DEPRECATED_CODE_4
};
} // namespace Kokkos
// Tuple protocol for complex based on https://wg21.link/P2819R2 (voted into
// the C++26 working draft on 2023-11)
template <typename RealType>
struct std::tuple_size<Kokkos::complex<RealType>>
: std::integral_constant<size_t, 2> {};
template <size_t I, typename RealType>
struct std::tuple_element<I, Kokkos::complex<RealType>> {
static_assert(I < 2);
using type = RealType;
};
namespace Kokkos {
// get<...>(...) defined here so as not to be hidden friends, as per P2819R2
template <size_t I, typename RealType>
KOKKOS_FUNCTION constexpr RealType& get(complex<RealType>& z) noexcept {
static_assert(I < 2);
if constexpr (I == 0)
return z.real();
else
return z.imag();
#ifdef KOKKOS_COMPILER_INTEL
__builtin_unreachable();
#endif
}
template <size_t I, typename RealType>
KOKKOS_FUNCTION constexpr RealType&& get(complex<RealType>&& z) noexcept {
static_assert(I < 2);
if constexpr (I == 0)
return std::move(z.real());
else
return std::move(z.imag());
#ifdef KOKKOS_COMPILER_INTEL
__builtin_unreachable();
#endif
}
template <size_t I, typename RealType>
KOKKOS_FUNCTION constexpr const RealType& get(
const complex<RealType>& z) noexcept {
static_assert(I < 2);
if constexpr (I == 0)
return z.re_;
else
return z.im_;
#ifdef KOKKOS_COMPILER_INTEL
__builtin_unreachable();
#endif
}
template <size_t I, typename RealType>
KOKKOS_FUNCTION constexpr const RealType&& get(
const complex<RealType>&& z) noexcept {
static_assert(I < 2);
if constexpr (I == 0)
return std::move(z.re_);
else
return std::move(z.im_);
#ifdef KOKKOS_COMPILER_INTEL
__builtin_unreachable();
#endif
}
//==============================================================================
// <editor-fold desc="Equality and inequality"> {{{1

View File

@ -221,10 +221,12 @@ struct ViewFill<ViewType, Layout, ExecSpace, 7, iType> {
ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_,
const ExecSpace& space)
: a(a_), val(val_) {
// MDRangePolicy is not supported for 7D views
// Iterate separately over extent(2)
Kokkos::parallel_for("Kokkos::ViewFill-7D",
policy_type(space, {0, 0, 0, 0, 0, 0},
{a.extent(0), a.extent(1), a.extent(2),
a.extent(3), a.extent(5), a.extent(6)}),
{a.extent(0), a.extent(1), a.extent(3),
a.extent(4), a.extent(5), a.extent(6)}),
*this);
}
@ -249,6 +251,8 @@ struct ViewFill<ViewType, Layout, ExecSpace, 8, iType> {
ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_,
const ExecSpace& space)
: a(a_), val(val_) {
// MDRangePolicy is not supported for 8D views
// Iterate separately over extent(2) and extent(4)
Kokkos::parallel_for("Kokkos::ViewFill-8D",
policy_type(space, {0, 0, 0, 0, 0, 0},
{a.extent(0), a.extent(1), a.extent(3),
@ -293,9 +297,11 @@ struct ViewCopy<ViewTypeA, ViewTypeB, Layout, ExecSpace, 2, iType> {
ViewTypeA a;
ViewTypeB b;
static const Kokkos::Iterate outer_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::outer_iteration_pattern;
static const Kokkos::Iterate inner_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::inner_iteration_pattern;
using iterate_type =
Kokkos::Rank<2, outer_iteration_pattern, inner_iteration_pattern>;
using policy_type =
@ -323,9 +329,11 @@ struct ViewCopy<ViewTypeA, ViewTypeB, Layout, ExecSpace, 3, iType> {
ViewTypeB b;
static const Kokkos::Iterate outer_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::outer_iteration_pattern;
static const Kokkos::Iterate inner_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::inner_iteration_pattern;
using iterate_type =
Kokkos::Rank<3, outer_iteration_pattern, inner_iteration_pattern>;
using policy_type =
@ -354,9 +362,11 @@ struct ViewCopy<ViewTypeA, ViewTypeB, Layout, ExecSpace, 4, iType> {
ViewTypeB b;
static const Kokkos::Iterate outer_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::outer_iteration_pattern;
static const Kokkos::Iterate inner_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::inner_iteration_pattern;
using iterate_type =
Kokkos::Rank<4, outer_iteration_pattern, inner_iteration_pattern>;
using policy_type =
@ -386,9 +396,11 @@ struct ViewCopy<ViewTypeA, ViewTypeB, Layout, ExecSpace, 5, iType> {
ViewTypeB b;
static const Kokkos::Iterate outer_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::outer_iteration_pattern;
static const Kokkos::Iterate inner_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::inner_iteration_pattern;
using iterate_type =
Kokkos::Rank<5, outer_iteration_pattern, inner_iteration_pattern>;
using policy_type =
@ -418,9 +430,11 @@ struct ViewCopy<ViewTypeA, ViewTypeB, Layout, ExecSpace, 6, iType> {
ViewTypeB b;
static const Kokkos::Iterate outer_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::outer_iteration_pattern;
static const Kokkos::Iterate inner_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::inner_iteration_pattern;
using iterate_type =
Kokkos::Rank<6, outer_iteration_pattern, inner_iteration_pattern>;
using policy_type =
@ -450,9 +464,11 @@ struct ViewCopy<ViewTypeA, ViewTypeB, Layout, ExecSpace, 7, iType> {
ViewTypeB b;
static const Kokkos::Iterate outer_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::outer_iteration_pattern;
static const Kokkos::Iterate inner_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::inner_iteration_pattern;
using iterate_type =
Kokkos::Rank<6, outer_iteration_pattern, inner_iteration_pattern>;
using policy_type =
@ -461,6 +477,8 @@ struct ViewCopy<ViewTypeA, ViewTypeB, Layout, ExecSpace, 7, iType> {
ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_,
const ExecSpace space = ExecSpace())
: a(a_), b(b_) {
// MDRangePolicy is not supported for 7D views
// Iterate separately over extent(2)
Kokkos::parallel_for("Kokkos::ViewCopy-7D",
policy_type(space, {0, 0, 0, 0, 0, 0},
{a.extent(0), a.extent(1), a.extent(3),
@ -483,9 +501,11 @@ struct ViewCopy<ViewTypeA, ViewTypeB, Layout, ExecSpace, 8, iType> {
ViewTypeB b;
static const Kokkos::Iterate outer_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::outer_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::outer_iteration_pattern;
static const Kokkos::Iterate inner_iteration_pattern =
Kokkos::layout_iterate_type_selector<Layout>::inner_iteration_pattern;
Kokkos::Impl::layout_iterate_type_selector<
Layout>::inner_iteration_pattern;
using iterate_type =
Kokkos::Rank<6, outer_iteration_pattern, inner_iteration_pattern>;
using policy_type =
@ -494,6 +514,8 @@ struct ViewCopy<ViewTypeA, ViewTypeB, Layout, ExecSpace, 8, iType> {
ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_,
const ExecSpace space = ExecSpace())
: a(a_), b(b_) {
// MDRangePolicy is not supported for 8D views
// Iterate separately over extent(2) and extent(4)
Kokkos::parallel_for("Kokkos::ViewCopy-8D",
policy_type(space, {0, 0, 0, 0, 0, 0},
{a.extent(0), a.extent(1), a.extent(3),
@ -539,11 +561,8 @@ void view_copy(const ExecutionSpace& space, const DstType& dst,
int64_t strides[DstType::rank + 1];
dst.stride(strides);
Kokkos::Iterate iterate;
if (Kokkos::is_layouttiled<typename DstType::array_layout>::value) {
iterate = Kokkos::layout_iterate_type_selector<
typename DstType::array_layout>::outer_iteration_pattern;
} else if (std::is_same<typename DstType::array_layout,
Kokkos::LayoutRight>::value) {
if (std::is_same<typename DstType::array_layout,
Kokkos::LayoutRight>::value) {
iterate = Kokkos::Iterate::Right;
} else if (std::is_same<typename DstType::array_layout,
Kokkos::LayoutLeft>::value) {
@ -630,11 +649,8 @@ void view_copy(const DstType& dst, const SrcType& src) {
int64_t strides[DstType::rank + 1];
dst.stride(strides);
Kokkos::Iterate iterate;
if (Kokkos::is_layouttiled<typename DstType::array_layout>::value) {
iterate = Kokkos::layout_iterate_type_selector<
typename DstType::array_layout>::outer_iteration_pattern;
} else if (std::is_same<typename DstType::array_layout,
Kokkos::LayoutRight>::value) {
if (std::is_same<typename DstType::array_layout,
Kokkos::LayoutRight>::value) {
iterate = Kokkos::Iterate::Right;
} else if (std::is_same<typename DstType::array_layout,
Kokkos::LayoutLeft>::value) {
@ -3092,8 +3108,7 @@ inline std::enable_if_t<
std::is_same<typename Kokkos::View<T, P...>::array_layout,
Kokkos::LayoutRight>::value ||
std::is_same<typename Kokkos::View<T, P...>::array_layout,
Kokkos::LayoutStride>::value ||
is_layouttiled<typename Kokkos::View<T, P...>::array_layout>::value>
Kokkos::LayoutStride>::value>
impl_resize(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
Kokkos::View<T, P...>& v,
const typename Kokkos::View<T, P...>::array_layout& layout) {
@ -3139,8 +3154,7 @@ inline std::enable_if_t<
std::is_same<typename Kokkos::View<T, P...>::array_layout,
Kokkos::LayoutRight>::value ||
std::is_same<typename Kokkos::View<T, P...>::array_layout,
Kokkos::LayoutStride>::value ||
is_layouttiled<typename Kokkos::View<T, P...>::array_layout>::value)>
Kokkos::LayoutStride>::value)>
impl_resize(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
Kokkos::View<T, P...>& v,
const typename Kokkos::View<T, P...>::array_layout& layout) {
@ -3235,7 +3249,10 @@ impl_realloc(Kokkos::View<T, P...>& v, const size_t n0, const size_t n1,
v = view_type(); // Best effort to deallocate in case no other view refers
// to the shared allocation
v = view_type(arg_prop_copy, n0, n1, n2, n3, n4, n5, n6, n7);
} else if (alloc_prop_input::initialize) {
return;
}
if constexpr (alloc_prop_input::initialize) {
if constexpr (alloc_prop_input::has_execution_space) {
const auto& exec_space =
Impl::get_property<Impl::ExecutionSpaceTag>(arg_prop);
@ -3308,8 +3325,7 @@ inline std::enable_if_t<
std::is_same<typename Kokkos::View<T, P...>::array_layout,
Kokkos::LayoutRight>::value ||
std::is_same<typename Kokkos::View<T, P...>::array_layout,
Kokkos::LayoutStride>::value ||
is_layouttiled<typename Kokkos::View<T, P...>::array_layout>::value>
Kokkos::LayoutStride>::value>
impl_realloc(Kokkos::View<T, P...>& v,
const typename Kokkos::View<T, P...>::array_layout& layout,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
@ -3331,7 +3347,10 @@ impl_realloc(Kokkos::View<T, P...>& v,
if (v.layout() != layout) {
v = view_type(); // Deallocate first, if the only view to allocation
v = view_type(arg_prop, layout);
} else if (alloc_prop_input::initialize) {
return;
}
if constexpr (alloc_prop_input::initialize) {
if constexpr (alloc_prop_input::has_execution_space) {
const auto& exec_space =
Impl::get_property<Impl::ExecutionSpaceTag>(arg_prop);
@ -3351,8 +3370,7 @@ inline std::enable_if_t<
std::is_same<typename Kokkos::View<T, P...>::array_layout,
Kokkos::LayoutRight>::value ||
std::is_same<typename Kokkos::View<T, P...>::array_layout,
Kokkos::LayoutStride>::value ||
is_layouttiled<typename Kokkos::View<T, P...>::array_layout>::value)>
Kokkos::LayoutStride>::value)>
impl_realloc(Kokkos::View<T, P...>& v,
const typename Kokkos::View<T, P...>::array_layout& layout,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
@ -3452,6 +3470,7 @@ struct MirrorType {
using view_type = Kokkos::View<data_type, array_layout, Space>;
};
// collection of static asserts for create_mirror and create_mirror_view
template <class... ViewCtorArgs>
void check_view_ctor_args_create_mirror() {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
@ -3470,232 +3489,231 @@ void check_view_ctor_args_create_mirror() {
"not explicitly allow padding!");
}
// create a mirror
// private interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t<!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space,
typename Kokkos::View<T, P...>::HostMirror>
create_mirror(const Kokkos::View<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
using src_type = View<T, P...>;
using dst_type = typename src_type::HostMirror;
inline auto create_mirror(const Kokkos::View<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
check_view_ctor_args_create_mirror<ViewCtorArgs...>();
auto prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string(src.label()).append("_mirror"));
return dst_type(prop_copy, src.layout());
}
// Create a mirror in a new space (specialization for different space)
template <class T, class... P, class... ViewCtorArgs,
class Enable = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
auto create_mirror(const Kokkos::View<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
check_view_ctor_args_create_mirror<ViewCtorArgs...>();
auto prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string(src.label()).append("_mirror"));
using alloc_prop = decltype(prop_copy);
return typename Impl::MirrorType<typename alloc_prop::memory_space, T,
P...>::view_type(prop_copy, src.layout());
if constexpr (Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
using memory_space = typename decltype(prop_copy)::memory_space;
using dst_type =
typename Impl::MirrorType<memory_space, T, P...>::view_type;
return dst_type(prop_copy, src.layout());
} else {
using dst_type = typename View<T, P...>::HostMirror;
return dst_type(prop_copy, src.layout());
}
#if defined(KOKKOS_COMPILER_INTEL) || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}
} // namespace Impl
template <class T, class... P>
std::enable_if_t<std::is_void<typename ViewTraits<T, P...>::specialize>::value,
typename Kokkos::View<T, P...>::HostMirror>
create_mirror(Kokkos::View<T, P...> const& v) {
return Impl::create_mirror(v, Impl::ViewCtorProp<>{});
// public interface
template <class T, class... P,
typename = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
auto create_mirror(Kokkos::View<T, P...> const& src) {
return Impl::create_mirror(src, Impl::ViewCtorProp<>{});
}
template <class T, class... P>
std::enable_if_t<std::is_void<typename ViewTraits<T, P...>::specialize>::value,
typename Kokkos::View<T, P...>::HostMirror>
create_mirror(Kokkos::Impl::WithoutInitializing_t wi,
Kokkos::View<T, P...> const& v) {
return Impl::create_mirror(v, view_alloc(wi));
// public interface that accepts a without initializing flag
template <class T, class... P,
typename = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi,
Kokkos::View<T, P...> const& src) {
return Impl::create_mirror(src, view_alloc(wi));
}
// public interface that accepts a space
template <class Space, class T, class... P,
typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
std::enable_if_t<std::is_void<typename ViewTraits<T, P...>::specialize>::value,
typename Impl::MirrorType<Space, T, P...>::view_type>
create_mirror(Space const&, Kokkos::View<T, P...> const& v) {
return Impl::create_mirror(v, view_alloc(typename Space::memory_space{}));
}
template <class T, class... P, class... ViewCtorArgs,
typename Enable = std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
Kokkos::is_space<Space>::value &&
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
auto create_mirror(Space const&, Kokkos::View<T, P...> const& src) {
return Impl::create_mirror(src, view_alloc(typename Space::memory_space{}));
}
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs,
typename = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
auto create_mirror(Impl::ViewCtorProp<ViewCtorArgs...> const& arg_prop,
Kokkos::View<T, P...> const& v) {
return Impl::create_mirror(v, arg_prop);
}
template <class T, class... P, class... ViewCtorArgs>
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space,
typename Kokkos::View<T, P...>::HostMirror>
create_mirror(Impl::ViewCtorProp<ViewCtorArgs...> const& arg_prop,
Kokkos::View<T, P...> const& v) {
return Impl::create_mirror(v, arg_prop);
Kokkos::View<T, P...> const& src) {
return Impl::create_mirror(src, arg_prop);
}
// public interface that accepts a space and a without initializing flag
template <class Space, class T, class... P,
typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
std::enable_if_t<std::is_void<typename ViewTraits<T, P...>::specialize>::value,
typename Impl::MirrorType<Space, T, P...>::view_type>
create_mirror(Kokkos::Impl::WithoutInitializing_t wi, Space const&,
Kokkos::View<T, P...> const& v) {
return Impl::create_mirror(v, view_alloc(typename Space::memory_space{}, wi));
typename Enable = std::enable_if_t<
Kokkos::is_space<Space>::value &&
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi, Space const&,
Kokkos::View<T, P...> const& src) {
return Impl::create_mirror(src,
view_alloc(typename Space::memory_space{}, wi));
}
namespace Impl {
// choose a `Kokkos::create_mirror` adapted for the provided view and the
// provided arguments
template <class View, class... ViewCtorArgs>
inline auto choose_create_mirror(
const View& src, const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
// Due to the fact that users can overload `Kokkos::create_mirror`, but also
// that they may not have implemented all of its different possible
// variations, this function chooses the correct private or public version of
// it to call.
// This helper should be used by any overload of
// `Kokkos::Impl::create_mirror_view`.
if constexpr (std::is_void_v<typename View::traits::specialize>) {
// if the view is not specialized, just call the Impl function
// using ADL to find the later defined overload of the function
using namespace Kokkos::Impl;
return create_mirror(src, arg_prop);
} else {
// otherwise, recreate the public call
using ViewProp = Impl::ViewCtorProp<ViewCtorArgs...>;
// using ADL to find the later defined overload of the function
using namespace Kokkos;
if constexpr (sizeof...(ViewCtorArgs) == 0) {
// if there are no view constructor args, call the specific public
// function
return create_mirror(src);
} else if constexpr (sizeof...(ViewCtorArgs) == 1 &&
ViewProp::has_memory_space) {
// if there is one view constructor arg and it has a memory space, call
// the specific public function
return create_mirror(typename ViewProp::memory_space{}, src);
} else if constexpr (sizeof...(ViewCtorArgs) == 1 &&
!ViewProp::initialize) {
// if there is one view constructor arg and it has a without initializing
// mark, call the specific public function
return create_mirror(typename Kokkos::Impl::WithoutInitializing_t{}, src);
} else if constexpr (sizeof...(ViewCtorArgs) == 2 &&
ViewProp::has_memory_space && !ViewProp::initialize) {
// if there is two view constructor args and they have a memory space and
// a without initializing mark, call the specific public function
return create_mirror(typename Kokkos::Impl::WithoutInitializing_t{},
typename ViewProp::memory_space{}, src);
} else {
// if there are other constructor args, call the generic public function
// Beware, there are some libraries using Kokkos that don't implement
// this overload (hence the reason for this present function to exist).
return create_mirror(arg_prop, src);
}
}
#if defined(KOKKOS_COMPILER_INTEL) || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}
// create a mirror view
// private interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t<
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
(std::is_same<
typename Kokkos::View<T, P...>::memory_space,
typename Kokkos::View<T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename Kokkos::View<T, P...>::data_type,
typename Kokkos::View<T, P...>::HostMirror::data_type>::value),
typename Kokkos::View<T, P...>::HostMirror>
create_mirror_view(const Kokkos::View<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>&) {
check_view_ctor_args_create_mirror<ViewCtorArgs...>();
return src;
}
template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t<
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
!(std::is_same<typename Kokkos::View<T, P...>::memory_space,
typename Kokkos::View<
T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename Kokkos::View<T, P...>::data_type,
typename Kokkos::View<T, P...>::HostMirror::data_type>::value),
typename Kokkos::View<T, P...>::HostMirror>
create_mirror_view(const Kokkos::View<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
return Kokkos::Impl::create_mirror(src, arg_prop);
}
// Create a mirror view in a new space (specialization for same space)
template <class T, class... P, class... ViewCtorArgs,
class = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
std::enable_if_t<Impl::MirrorViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace,
typename Impl::MirrorViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::view_type>
create_mirror_view(const Kokkos::View<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>&) {
check_view_ctor_args_create_mirror<ViewCtorArgs...>();
return src;
}
// Create a mirror view in a new space (specialization for different space)
template <class T, class... P, class... ViewCtorArgs,
class = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
std::enable_if_t<!Impl::MirrorViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace,
typename Impl::MirrorViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::view_type>
create_mirror_view(const Kokkos::View<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
return Kokkos::Impl::create_mirror(src, arg_prop);
inline auto create_mirror_view(
const Kokkos::View<T, P...>& src,
[[maybe_unused]] const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
if constexpr (!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
if constexpr (std::is_same<typename Kokkos::View<T, P...>::memory_space,
typename Kokkos::View<
T, P...>::HostMirror::memory_space>::value &&
std::is_same<typename Kokkos::View<T, P...>::data_type,
typename Kokkos::View<
T, P...>::HostMirror::data_type>::value) {
check_view_ctor_args_create_mirror<ViewCtorArgs...>();
return typename Kokkos::View<T, P...>::HostMirror(src);
} else {
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
}
} else {
if constexpr (Impl::MirrorViewType<typename Impl::ViewCtorProp<
ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace) {
check_view_ctor_args_create_mirror<ViewCtorArgs...>();
return typename Impl::MirrorViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type(src);
} else {
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
}
}
#if defined(KOKKOS_COMPILER_INTEL) || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}
} // namespace Impl
// public interface
template <class T, class... P>
std::enable_if_t<
std::is_same<
typename Kokkos::View<T, P...>::memory_space,
typename Kokkos::View<T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename Kokkos::View<T, P...>::data_type,
typename Kokkos::View<T, P...>::HostMirror::data_type>::value,
typename Kokkos::View<T, P...>::HostMirror>
create_mirror_view(const Kokkos::View<T, P...>& src) {
return src;
auto create_mirror_view(const Kokkos::View<T, P...>& src) {
return Impl::create_mirror_view(src, view_alloc());
}
// public interface that accepts a without initializing flag
template <class T, class... P>
std::enable_if_t<
!(std::is_same<
typename Kokkos::View<T, P...>::memory_space,
typename Kokkos::View<T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename Kokkos::View<T, P...>::data_type,
typename Kokkos::View<T, P...>::HostMirror::data_type>::value),
typename Kokkos::View<T, P...>::HostMirror>
create_mirror_view(const Kokkos::View<T, P...>& src) {
return Kokkos::create_mirror(src);
auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi,
Kokkos::View<T, P...> const& src) {
return Impl::create_mirror_view(src, view_alloc(wi));
}
template <class T, class... P>
typename Kokkos::View<T, P...>::HostMirror create_mirror_view(
Kokkos::Impl::WithoutInitializing_t wi, Kokkos::View<T, P...> const& v) {
return Impl::create_mirror_view(v, view_alloc(wi));
}
// FIXME_C++17 Improve SFINAE here.
// public interface that accepts a space
template <class Space, class T, class... P,
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view(
const Space&, const Kokkos::View<T, P...>& src,
std::enable_if_t<Impl::MirrorViewType<Space, T, P...>::is_same_memspace>* =
nullptr) {
return src;
}
// FIXME_C++17 Improve SFINAE here.
template <class Space, class T, class... P,
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view(
const Space& space, const Kokkos::View<T, P...>& src,
std::enable_if_t<!Impl::MirrorViewType<Space, T, P...>::is_same_memspace>* =
nullptr) {
return Kokkos::create_mirror(space, src);
auto create_mirror_view(const Space&, const Kokkos::View<T, P...>& src) {
return Impl::create_mirror_view(src,
view_alloc(typename Space::memory_space()));
}
// public interface that accepts a space and a without initializing flag
template <class Space, class T, class... P,
typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
typename Impl::MirrorViewType<Space, T, P...>::view_type create_mirror_view(
Kokkos::Impl::WithoutInitializing_t wi, Space const&,
Kokkos::View<T, P...> const& v) {
auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, Space const&,
Kokkos::View<T, P...> const& src) {
return Impl::create_mirror_view(
v, view_alloc(typename Space::memory_space{}, wi));
src, view_alloc(typename Space::memory_space{}, wi));
}
template <class T, class... P, class... ViewCtorArgs>
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs,
typename = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
auto create_mirror_view(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::View<T, P...>& v) {
return Impl::create_mirror_view(v, arg_prop);
const Kokkos::View<T, P...>& src) {
return Impl::create_mirror_view(src, arg_prop);
}
template <class... ViewCtorArgs, class T, class... P>
auto create_mirror_view_and_copy(
const Impl::ViewCtorProp<ViewCtorArgs...>&,
const Kokkos::View<T, P...>& src,
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
Impl::MirrorViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace>* = nullptr) {
namespace Impl {
// collection of static asserts for create_mirror_view_and_copy
template <class... ViewCtorArgs>
void check_view_ctor_args_create_mirror_view_and_copy() {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
alloc_prop_input::has_memory_space,
"The view constructor arguments passed to "
@ -3708,52 +3726,53 @@ auto create_mirror_view_and_copy(
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not explicitly allow padding!");
// same behavior as deep_copy(src, src)
if (!alloc_prop_input::has_execution_space)
fence(
"Kokkos::create_mirror_view_and_copy: fence before returning src view");
return src;
}
template <class... ViewCtorArgs, class T, class... P>
} // namespace Impl
// create a mirror view and deep copy it
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class... ViewCtorArgs, class T, class... P,
class Enable = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
auto create_mirror_view_and_copy(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::View<T, P...>& src,
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
!Impl::MirrorViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace>* = nullptr) {
[[maybe_unused]] const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::View<T, P...>& src) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
alloc_prop_input::has_memory_space,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must include a memory space!");
static_assert(!alloc_prop_input::has_pointer,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not include a pointer!");
static_assert(!alloc_prop_input::allow_padding,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not explicitly allow padding!");
using Space = typename alloc_prop_input::memory_space;
using Mirror = typename Impl::MirrorViewType<Space, T, P...>::view_type;
auto arg_prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string{}, WithoutInitializing,
typename Space::execution_space{});
Impl::check_view_ctor_args_create_mirror_view_and_copy<ViewCtorArgs...>();
std::string& label = Impl::get_property<Impl::LabelTag>(arg_prop_copy);
if (label.empty()) label = src.label();
auto mirror = typename Mirror::non_const_type{arg_prop_copy, src.layout()};
if constexpr (alloc_prop_input::has_execution_space) {
deep_copy(Impl::get_property<Impl::ExecutionSpaceTag>(arg_prop_copy),
mirror, src);
} else
deep_copy(mirror, src);
return mirror;
if constexpr (Impl::MirrorViewType<typename alloc_prop_input::memory_space, T,
P...>::is_same_memspace) {
// same behavior as deep_copy(src, src)
if constexpr (!alloc_prop_input::has_execution_space)
fence(
"Kokkos::create_mirror_view_and_copy: fence before returning src "
"view");
return src;
} else {
using Space = typename alloc_prop_input::memory_space;
using Mirror = typename Impl::MirrorViewType<Space, T, P...>::view_type;
auto arg_prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string{}, WithoutInitializing,
typename Space::execution_space{});
std::string& label = Impl::get_property<Impl::LabelTag>(arg_prop_copy);
if (label.empty()) label = src.label();
auto mirror = typename Mirror::non_const_type{arg_prop_copy, src.layout()};
if constexpr (alloc_prop_input::has_execution_space) {
deep_copy(Impl::get_property<Impl::ExecutionSpaceTag>(arg_prop_copy),
mirror, src);
} else
deep_copy(mirror, src);
return mirror;
}
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC)
__builtin_unreachable();
#endif
}
// Previously when using auto here, the intel compiler 19.3 would

View File

@ -40,7 +40,12 @@ struct ParallelReduceTag {};
struct ChunkSize {
int value;
explicit ChunkSize(int value_) : value(value_) {}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
template <typename T = void>
KOKKOS_DEPRECATED_WITH_COMMENT("ChunkSize should be constructed explicitly.")
ChunkSize(int value_) : value(value_) {}
#endif
};
/** \brief Execution policy for work over a range of an integral type.
@ -714,6 +719,58 @@ class TeamPolicy
}
};
// Execution space not provided deduces to TeamPolicy<>
TeamPolicy()->TeamPolicy<>;
TeamPolicy(int, int)->TeamPolicy<>;
TeamPolicy(int, int, int)->TeamPolicy<>;
TeamPolicy(int, Kokkos::AUTO_t const&)->TeamPolicy<>;
TeamPolicy(int, Kokkos::AUTO_t const&, int)->TeamPolicy<>;
TeamPolicy(int, Kokkos::AUTO_t const&, Kokkos::AUTO_t const&)->TeamPolicy<>;
TeamPolicy(int, int, Kokkos::AUTO_t const&)->TeamPolicy<>;
// DefaultExecutionSpace deduces to TeamPolicy<>
TeamPolicy(DefaultExecutionSpace const&, int, int)->TeamPolicy<>;
TeamPolicy(DefaultExecutionSpace const&, int, int, int)->TeamPolicy<>;
TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&)
->TeamPolicy<>;
TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&, int)
->TeamPolicy<>;
TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&,
Kokkos::AUTO_t const&)
->TeamPolicy<>;
TeamPolicy(DefaultExecutionSpace const&, int, int, Kokkos::AUTO_t const&)
->TeamPolicy<>;
// ES != DefaultExecutionSpace deduces to TeamPolicy<ES>
template <typename ES,
typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
TeamPolicy(ES const&, int, int)->TeamPolicy<ES>;
template <typename ES,
typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
TeamPolicy(ES const&, int, int, int)->TeamPolicy<ES>;
template <typename ES,
typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
TeamPolicy(ES const&, int, Kokkos::AUTO_t const&)->TeamPolicy<ES>;
template <typename ES,
typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
TeamPolicy(ES const&, int, Kokkos::AUTO_t const&, int)->TeamPolicy<ES>;
template <typename ES,
typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
TeamPolicy(ES const&, int, Kokkos::AUTO_t const&, Kokkos::AUTO_t const&)
->TeamPolicy<ES>;
template <typename ES,
typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
TeamPolicy(ES const&, int, int, Kokkos::AUTO_t const&)->TeamPolicy<ES>;
namespace Impl {
template <typename iType, class TeamMemberType>
@ -968,9 +1025,9 @@ struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
static constexpr auto par_vector = Impl::TeamMDRangeParVector::NotParVector;
static constexpr Iterate direction =
OuterDir == Iterate::Default
? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
: iter;
OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
ArrayLayout>::outer_iteration_pattern
: iter;
template <class... Args>
KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType const& team_, Args&&... args)
@ -983,7 +1040,7 @@ struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
};
template <typename TeamHandle, typename... Args>
TeamThreadMDRange(TeamHandle const&, Args&&...)
KOKKOS_DEDUCTION_GUIDE TeamThreadMDRange(TeamHandle const&, Args&&...)
->TeamThreadMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
template <typename Rank, typename TeamHandle>
@ -1004,9 +1061,9 @@ struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
static constexpr Iterate direction =
OuterDir == Iterate::Default
? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
: iter;
OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
ArrayLayout>::outer_iteration_pattern
: iter;
template <class... Args>
KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType const& team_,
@ -1020,7 +1077,7 @@ struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
};
template <typename TeamHandle, typename... Args>
ThreadVectorMDRange(TeamHandle const&, Args&&...)
KOKKOS_DEDUCTION_GUIDE ThreadVectorMDRange(TeamHandle const&, Args&&...)
->ThreadVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
template <typename Rank, typename TeamHandle>
@ -1041,9 +1098,9 @@ struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
static constexpr Iterate direction =
iter == Iterate::Default
? layout_iterate_type_selector<ArrayLayout>::outer_iteration_pattern
: iter;
iter == Iterate::Default ? Impl::layout_iterate_type_selector<
ArrayLayout>::outer_iteration_pattern
: iter;
template <class... Args>
KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType const& team_,
@ -1057,7 +1114,7 @@ struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
};
template <typename TeamHandle, typename... Args>
TeamVectorMDRange(TeamHandle const&, Args&&...)
KOKKOS_DEDUCTION_GUIDE TeamVectorMDRange(TeamHandle const&, Args&&...)
->TeamVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
template <typename Rank, typename TeamHandle, typename Lambda,

View File

@ -25,33 +25,40 @@ static_assert(false,
#include <cstddef>
#include <type_traits>
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_IMPL_MDSPAN
#include <mdspan/mdspan.hpp>
#else
#include <limits>
#endif
namespace Kokkos {
#ifndef KOKKOS_ENABLE_IMPL_MDSPAN
constexpr size_t dynamic_extent = std::numeric_limits<size_t>::max();
#endif
namespace Experimental {
constexpr ptrdiff_t dynamic_extent = -1;
template <ptrdiff_t... ExtentSpecs>
template <size_t... ExtentSpecs>
struct Extents {
/* TODO @enhancement flesh this out more */
};
template <class Exts, ptrdiff_t NewExtent>
template <class Exts, size_t NewExtent>
struct PrependExtent;
template <ptrdiff_t... Exts, ptrdiff_t NewExtent>
template <size_t... Exts, size_t NewExtent>
struct PrependExtent<Extents<Exts...>, NewExtent> {
using type = Extents<NewExtent, Exts...>;
};
template <class Exts, ptrdiff_t NewExtent>
template <class Exts, size_t NewExtent>
struct AppendExtent;
template <ptrdiff_t... Exts, ptrdiff_t NewExtent>
template <size_t... Exts, size_t NewExtent>
struct AppendExtent<Extents<Exts...>, NewExtent> {
using type = Extents<Exts..., NewExtent>;
};
} // end namespace Experimental
namespace Impl {
@ -75,33 +82,32 @@ struct _parse_impl {
// We have to treat the case of int**[x] specially, since it *doesn't* go
// backwards
template <class T, ptrdiff_t... ExtentSpec>
template <class T, size_t... ExtentSpec>
struct _parse_impl<T*, Kokkos::Experimental::Extents<ExtentSpec...>,
std::enable_if_t<_all_remaining_extents_dynamic<T>::value>>
: _parse_impl<T, Kokkos::Experimental::Extents<
Kokkos::Experimental::dynamic_extent, ExtentSpec...>> {
};
: _parse_impl<T, Kokkos::Experimental::Extents<Kokkos::dynamic_extent,
ExtentSpec...>> {};
// int*(*[x])[y] should still work also (meaning int[][x][][y])
template <class T, ptrdiff_t... ExtentSpec>
template <class T, size_t... ExtentSpec>
struct _parse_impl<
T*, Kokkos::Experimental::Extents<ExtentSpec...>,
std::enable_if_t<!_all_remaining_extents_dynamic<T>::value>> {
using _next = Kokkos::Experimental::AppendExtent<
typename _parse_impl<T, Kokkos::Experimental::Extents<ExtentSpec...>,
void>::type,
Kokkos::Experimental::dynamic_extent>;
Kokkos::dynamic_extent>;
using type = typename _next::type;
};
template <class T, ptrdiff_t... ExtentSpec, unsigned N>
template <class T, size_t... ExtentSpec, unsigned N>
struct _parse_impl<T[N], Kokkos::Experimental::Extents<ExtentSpec...>, void>
: _parse_impl<
T, Kokkos::Experimental::Extents<ExtentSpec...,
ptrdiff_t(N)> // TODO @pedantic this
// could be a
// narrowing cast
> {};
: _parse_impl<T,
Kokkos::Experimental::Extents<ExtentSpec...,
size_t(N)> // TODO @pedantic
// this could be a
// narrowing cast
> {};
} // end namespace _parse_view_extents_impl
@ -111,38 +117,34 @@ struct ParseViewExtents {
DataType, Kokkos::Experimental::Extents<>>::type;
};
template <class ValueType, ptrdiff_t Ext>
template <class ValueType, size_t Ext>
struct ApplyExtent {
using type = ValueType[Ext];
};
template <class ValueType>
struct ApplyExtent<ValueType, Kokkos::Experimental::dynamic_extent> {
struct ApplyExtent<ValueType, Kokkos::dynamic_extent> {
using type = ValueType*;
};
template <class ValueType, unsigned N, ptrdiff_t Ext>
template <class ValueType, unsigned N, size_t Ext>
struct ApplyExtent<ValueType[N], Ext> {
using type = typename ApplyExtent<ValueType, Ext>::type[N];
};
template <class ValueType, ptrdiff_t Ext>
template <class ValueType, size_t Ext>
struct ApplyExtent<ValueType*, Ext> {
using type = ValueType * [Ext];
};
template <class ValueType>
struct ApplyExtent<ValueType*, Kokkos::Experimental::dynamic_extent> {
using type =
typename ApplyExtent<ValueType,
Kokkos::Experimental::dynamic_extent>::type*;
struct ApplyExtent<ValueType*, dynamic_extent> {
using type = typename ApplyExtent<ValueType, dynamic_extent>::type*;
};
template <class ValueType, unsigned N>
struct ApplyExtent<ValueType[N], Kokkos::Experimental::dynamic_extent> {
using type =
typename ApplyExtent<ValueType,
Kokkos::Experimental::dynamic_extent>::type[N];
struct ApplyExtent<ValueType[N], dynamic_extent> {
using type = typename ApplyExtent<ValueType, dynamic_extent>::type[N];
};
} // end namespace Impl

View File

@ -167,6 +167,9 @@ Graph<ExecutionSpace> create_graph(Closure&& arg_closure) {
#include <HIP/Kokkos_HIP_Graph_Impl.hpp>
#endif
#endif
#ifdef SYCL_EXT_ONEAPI_GRAPH
#include <SYCL/Kokkos_SYCL_Graph_Impl.hpp>
#endif
#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_GRAPH
#undef KOKKOS_IMPL_PUBLIC_INCLUDE
#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_GRAPH

View File

@ -113,7 +113,6 @@ class HostSpace {
const size_t arg_alloc_size,
const size_t arg_logical_size = 0) const;
private:
void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
const size_t arg_logical_size = 0,
const Kokkos::Tools::SpaceHandle =
@ -124,7 +123,6 @@ class HostSpace {
const Kokkos::Tools::SpaceHandle =
Kokkos::Tools::make_space_handle(name())) const;
public:
/**\brief Return Name of the MemorySpace */
static constexpr const char* name() { return m_name; }

View File

@ -217,81 +217,12 @@ enum class Iterate {
Right // Right indices stride fastest
};
// To check for LayoutTiled
// This is to hide extra compile-time 'identifier' info within the LayoutTiled
// class by not relying on template specialization to include the ArgN*'s
template <typename LayoutTiledCheck, class Enable = void>
struct is_layouttiled : std::false_type {};
template <typename LayoutTiledCheck>
struct is_layouttiled<LayoutTiledCheck,
std::enable_if_t<LayoutTiledCheck::is_array_layout_tiled>>
: std::true_type {};
namespace Experimental {
/// LayoutTiled
// Must have Rank >= 2
template <
Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0,
unsigned ArgN1, unsigned ArgN2 = 0, unsigned ArgN3 = 0, unsigned ArgN4 = 0,
unsigned ArgN5 = 0, unsigned ArgN6 = 0, unsigned ArgN7 = 0,
bool IsPowerOfTwo =
(Kokkos::Impl::is_integral_power_of_two(ArgN0) &&
Kokkos::Impl::is_integral_power_of_two(ArgN1) &&
(Kokkos::Impl::is_integral_power_of_two(ArgN2) || (ArgN2 == 0)) &&
(Kokkos::Impl::is_integral_power_of_two(ArgN3) || (ArgN3 == 0)) &&
(Kokkos::Impl::is_integral_power_of_two(ArgN4) || (ArgN4 == 0)) &&
(Kokkos::Impl::is_integral_power_of_two(ArgN5) || (ArgN5 == 0)) &&
(Kokkos::Impl::is_integral_power_of_two(ArgN6) || (ArgN6 == 0)) &&
(Kokkos::Impl::is_integral_power_of_two(ArgN7) || (ArgN7 == 0)))>
struct LayoutTiled {
static_assert(IsPowerOfTwo,
"LayoutTiled must be given power-of-two tile dimensions");
using array_layout = LayoutTiled<OuterP, InnerP, ArgN0, ArgN1, ArgN2, ArgN3,
ArgN4, ArgN5, ArgN6, ArgN7, IsPowerOfTwo>;
static constexpr Iterate outer_pattern = OuterP;
static constexpr Iterate inner_pattern = InnerP;
enum { N0 = ArgN0 };
enum { N1 = ArgN1 };
enum { N2 = ArgN2 };
enum { N3 = ArgN3 };
enum { N4 = ArgN4 };
enum { N5 = ArgN5 };
enum { N6 = ArgN6 };
enum { N7 = ArgN7 };
size_t dimension[ARRAY_LAYOUT_MAX_RANK];
enum : bool { is_extent_constructible = true };
LayoutTiled(LayoutTiled const&) = default;
LayoutTiled(LayoutTiled&&) = default;
LayoutTiled& operator=(LayoutTiled const&) = default;
LayoutTiled& operator=(LayoutTiled&&) = default;
KOKKOS_INLINE_FUNCTION
explicit constexpr LayoutTiled(size_t argN0 = 0, size_t argN1 = 0,
size_t argN2 = 0, size_t argN3 = 0,
size_t argN4 = 0, size_t argN5 = 0,
size_t argN6 = 0, size_t argN7 = 0)
: dimension{argN0, argN1, argN2, argN3, argN4, argN5, argN6, argN7} {}
friend bool operator==(const LayoutTiled& left, const LayoutTiled& right) {
for (unsigned int rank = 0; rank < ARRAY_LAYOUT_MAX_RANK; ++rank)
if (left.dimension[rank] != right.dimension[rank]) return false;
return true;
}
friend bool operator!=(const LayoutTiled& left, const LayoutTiled& right) {
return !(left == right);
}
};
} // namespace Experimental
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
template <typename Layout, class Enable = void>
struct KOKKOS_DEPRECATED is_layouttiled : std::false_type {};
#endif
namespace Impl {
// For use with view_copy
template <typename... Layout>
struct layout_iterate_type_selector {
@ -320,42 +251,13 @@ struct layout_iterate_type_selector<Kokkos::LayoutStride> {
static const Kokkos::Iterate inner_iteration_pattern =
Kokkos::Iterate::Default;
};
} // namespace Impl
template <unsigned ArgN0, unsigned ArgN1, unsigned ArgN2, unsigned ArgN3,
unsigned ArgN4, unsigned ArgN5, unsigned ArgN6, unsigned ArgN7>
struct layout_iterate_type_selector<Kokkos::Experimental::LayoutTiled<
Kokkos::Iterate::Left, Kokkos::Iterate::Left, ArgN0, ArgN1, ArgN2, ArgN3,
ArgN4, ArgN5, ArgN6, ArgN7, true>> {
static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left;
static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left;
};
template <unsigned ArgN0, unsigned ArgN1, unsigned ArgN2, unsigned ArgN3,
unsigned ArgN4, unsigned ArgN5, unsigned ArgN6, unsigned ArgN7>
struct layout_iterate_type_selector<Kokkos::Experimental::LayoutTiled<
Kokkos::Iterate::Right, Kokkos::Iterate::Left, ArgN0, ArgN1, ArgN2, ArgN3,
ArgN4, ArgN5, ArgN6, ArgN7, true>> {
static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right;
static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left;
};
template <unsigned ArgN0, unsigned ArgN1, unsigned ArgN2, unsigned ArgN3,
unsigned ArgN4, unsigned ArgN5, unsigned ArgN6, unsigned ArgN7>
struct layout_iterate_type_selector<Kokkos::Experimental::LayoutTiled<
Kokkos::Iterate::Left, Kokkos::Iterate::Right, ArgN0, ArgN1, ArgN2, ArgN3,
ArgN4, ArgN5, ArgN6, ArgN7, true>> {
static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left;
static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right;
};
template <unsigned ArgN0, unsigned ArgN1, unsigned ArgN2, unsigned ArgN3,
unsigned ArgN4, unsigned ArgN5, unsigned ArgN6, unsigned ArgN7>
struct layout_iterate_type_selector<Kokkos::Experimental::LayoutTiled<
Kokkos::Iterate::Right, Kokkos::Iterate::Right, ArgN0, ArgN1, ArgN2, ArgN3,
ArgN4, ArgN5, ArgN6, ArgN7, true>> {
static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right;
static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right;
};
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
template <typename... Layout>
using layout_iterate_type_selector KOKKOS_DEPRECATED =
Impl::layout_iterate_type_selector<Layout...>;
#endif
} // namespace Kokkos

View File

@ -55,9 +55,22 @@
#ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
#include <KokkosCore_config.h>
#include <impl/Kokkos_DesulAtomicsConfig.hpp>
#include <impl/Kokkos_NvidiaGpuArchitectures.hpp>
#endif
#if !defined(KOKKOS_ENABLE_CXX17)
#if __has_include(<version>)
#include <version>
#else
#include <ciso646>
#endif
#if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 10
#error \
"Compiling with support for C++20 or later requires a libstdc++ version later than 9"
#endif
#endif
//----------------------------------------------------------------------------
/** Pick up compiler specific #define macros:
*
@ -332,6 +345,10 @@
#define KOKKOS_DEFAULTED_FUNCTION
#endif
#if !defined(KOKKOS_DEDUCTION_GUIDE)
#define KOKKOS_DEDUCTION_GUIDE
#endif
#if !defined(KOKKOS_IMPL_HOST_FUNCTION)
#define KOKKOS_IMPL_HOST_FUNCTION
#endif
@ -562,8 +579,44 @@ static constexpr bool kokkos_omp_on_host() { return false; }
#define KOKKOS_IMPL_WARNING(desc) KOKKOS_IMPL_DO_PRAGMA(message(#desc))
#endif
// clang-format off
#if defined(__NVCOMPILER)
#define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() \
_Pragma("diag_suppress 1216")
#define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() \
_Pragma("diag_default 1216")
#elif defined(__EDG__)
#define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() \
_Pragma("warning push") \
_Pragma("warning disable 1478")
#define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() \
_Pragma("warning pop")
#elif defined(__GNUC__) || defined(__clang__)
#define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
#define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() \
_Pragma("GCC diagnostic pop")
#elif defined(_MSC_VER)
#define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() \
_Pragma("warning(push)") \
_Pragma("warning(disable: 4996)")
#define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() \
_Pragma("warning(pop)")
#else
#define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH()
#define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP()
#endif
// clang-format on
#define KOKKOS_ATTRIBUTE_NODISCARD [[nodiscard]]
#ifndef KOKKOS_ENABLE_CXX17
#define KOKKOS_IMPL_ATTRIBUTE_UNLIKELY [[unlikely]]
#else
#define KOKKOS_IMPL_ATTRIBUTE_UNLIKELY
#endif
#if (defined(KOKKOS_COMPILER_GNU) || defined(KOKKOS_COMPILER_CLANG) || \
defined(KOKKOS_COMPILER_INTEL) || defined(KOKKOS_COMPILER_INTEL_LLVM) || \
defined(KOKKOS_COMPILER_NVHPC)) && \

View File

@ -277,12 +277,20 @@ KOKKOS_INLINE_FUNCTION long long abs(long long n) {
#endif
}
KOKKOS_INLINE_FUNCTION float abs(float x) {
#ifdef KOKKOS_ENABLE_SYCL
return sycl::fabs(x); // sycl::abs is only provided for integral types
#else
using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::abs;
return abs(x);
#endif
}
KOKKOS_INLINE_FUNCTION double abs(double x) {
#ifdef KOKKOS_ENABLE_SYCL
return sycl::fabs(x); // sycl::abs is only provided for integral types
#else
using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::abs;
return abs(x);
#endif
}
inline long double abs(long double x) {
using std::abs;

View File

@ -413,12 +413,13 @@ KOKKOS_FORCEINLINE_FUNCTION pair<T1&, T2&> tie(T1& x, T2& y) {
return (pair<T1&, T2&>(x, y));
}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
//
// Specialization of Kokkos::pair for a \c void second argument. This
// is not actually a "pair"; it only contains one element, the first.
//
template <class T1>
struct pair<T1, void> {
struct KOKKOS_DEPRECATED pair<T1, void> {
using first_type = T1;
using second_type = void;
@ -448,41 +449,48 @@ struct pair<T1, void> {
// Specialization of relational operators for Kokkos::pair<T1,void>.
//
#if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU < 1110)
KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH()
#endif
template <class T1>
KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator==(
KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator==(
const pair<T1, void>& lhs, const pair<T1, void>& rhs) {
return lhs.first == rhs.first;
}
template <class T1>
KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator!=(
KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator!=(
const pair<T1, void>& lhs, const pair<T1, void>& rhs) {
return !(lhs == rhs);
}
template <class T1>
KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<(
KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<(
const pair<T1, void>& lhs, const pair<T1, void>& rhs) {
return lhs.first < rhs.first;
}
template <class T1>
KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<=(
KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<=(
const pair<T1, void>& lhs, const pair<T1, void>& rhs) {
return !(rhs < lhs);
}
template <class T1>
KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>(
KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>(
const pair<T1, void>& lhs, const pair<T1, void>& rhs) {
return rhs < lhs;
}
template <class T1>
KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>=(
KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>=(
const pair<T1, void>& lhs, const pair<T1, void>& rhs) {
return !(lhs < rhs);
}
#if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU < 1110)
KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP()
#endif
#endif
namespace Impl {
template <class T>

View File

@ -137,9 +137,9 @@ inline void parallel_for(const std::string& str, const ExecPolicy& policy,
ExecPolicy inner_policy = policy;
Kokkos::Tools::Impl::begin_parallel_for(inner_policy, functor, str, kpID);
Kokkos::Impl::shared_allocation_tracking_disable();
Impl::ParallelFor<FunctorType, ExecPolicy> closure(functor, inner_policy);
Kokkos::Impl::shared_allocation_tracking_enable();
auto closure =
Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
Impl::ParallelFor<FunctorType, ExecPolicy>>(functor, inner_policy);
closure.execute();
@ -352,10 +352,10 @@ inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy,
ExecutionPolicy inner_policy = policy;
Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
Kokkos::Impl::shared_allocation_tracking_disable();
Impl::ParallelScan<FunctorType, ExecutionPolicy> closure(functor,
inner_policy);
Kokkos::Impl::shared_allocation_tracking_enable();
auto closure =
Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
Impl::ParallelScan<FunctorType, ExecutionPolicy>>(functor,
inner_policy);
closure.execute();
@ -398,18 +398,19 @@ inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy,
Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
if constexpr (Kokkos::is_view<ReturnType>::value) {
Kokkos::Impl::shared_allocation_tracking_disable();
Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy,
typename ReturnType::value_type>
closure(functor, inner_policy, return_value);
Kokkos::Impl::shared_allocation_tracking_enable();
auto closure =
Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy,
typename ReturnType::value_type>>(
functor, inner_policy, return_value);
closure.execute();
} else {
Kokkos::Impl::shared_allocation_tracking_disable();
Kokkos::View<ReturnType, Kokkos::HostSpace> view(&return_value);
Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy, ReturnType>
closure(functor, inner_policy, view);
Kokkos::Impl::shared_allocation_tracking_enable();
auto closure =
Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy,
ReturnType>>(functor, inner_policy,
view);
closure.execute();
}

View File

@ -72,7 +72,7 @@ struct Sum {
};
template <typename Scalar, typename... Properties>
Sum(View<Scalar, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE Sum(View<Scalar, Properties...> const&)
->Sum<Scalar, typename View<Scalar, Properties...>::memory_space>;
template <class Scalar, class Space>
@ -117,7 +117,7 @@ struct Prod {
};
template <typename Scalar, typename... Properties>
Prod(View<Scalar, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE Prod(View<Scalar, Properties...> const&)
->Prod<Scalar, typename View<Scalar, Properties...>::memory_space>;
template <class Scalar, class Space>
@ -164,7 +164,7 @@ struct Min {
};
template <typename Scalar, typename... Properties>
Min(View<Scalar, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE Min(View<Scalar, Properties...> const&)
->Min<Scalar, typename View<Scalar, Properties...>::memory_space>;
template <class Scalar, class Space>
@ -212,7 +212,7 @@ struct Max {
};
template <typename Scalar, typename... Properties>
Max(View<Scalar, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE Max(View<Scalar, Properties...> const&)
->Max<Scalar, typename View<Scalar, Properties...>::memory_space>;
template <class Scalar, class Space>
@ -258,7 +258,7 @@ struct LAnd {
};
template <typename Scalar, typename... Properties>
LAnd(View<Scalar, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE LAnd(View<Scalar, Properties...> const&)
->LAnd<Scalar, typename View<Scalar, Properties...>::memory_space>;
template <class Scalar, class Space>
@ -305,7 +305,7 @@ struct LOr {
};
template <typename Scalar, typename... Properties>
LOr(View<Scalar, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE LOr(View<Scalar, Properties...> const&)
->LOr<Scalar, typename View<Scalar, Properties...>::memory_space>;
template <class Scalar, class Space>
@ -352,7 +352,7 @@ struct BAnd {
};
template <typename Scalar, typename... Properties>
BAnd(View<Scalar, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE BAnd(View<Scalar, Properties...> const&)
->BAnd<Scalar, typename View<Scalar, Properties...>::memory_space>;
template <class Scalar, class Space>
@ -399,7 +399,7 @@ struct BOr {
};
template <typename Scalar, typename... Properties>
BOr(View<Scalar, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE BOr(View<Scalar, Properties...> const&)
->BOr<Scalar, typename View<Scalar, Properties...>::memory_space>;
template <class Scalar, class Index>
@ -458,7 +458,8 @@ struct MinLoc {
};
template <typename Scalar, typename Index, typename... Properties>
MinLoc(View<ValLocScalar<Scalar, Index>, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE MinLoc(
View<ValLocScalar<Scalar, Index>, Properties...> const&)
->MinLoc<Scalar, Index,
typename View<ValLocScalar<Scalar, Index>,
Properties...>::memory_space>;
@ -513,7 +514,8 @@ struct MaxLoc {
};
template <typename Scalar, typename Index, typename... Properties>
MaxLoc(View<ValLocScalar<Scalar, Index>, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE MaxLoc(
View<ValLocScalar<Scalar, Index>, Properties...> const&)
->MaxLoc<Scalar, Index,
typename View<ValLocScalar<Scalar, Index>,
Properties...>::memory_space>;
@ -577,7 +579,7 @@ struct MinMax {
};
template <typename Scalar, typename... Properties>
MinMax(View<MinMaxScalar<Scalar>, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE MinMax(View<MinMaxScalar<Scalar>, Properties...> const&)
->MinMax<Scalar,
typename View<MinMaxScalar<Scalar>, Properties...>::memory_space>;
@ -646,7 +648,8 @@ struct MinMaxLoc {
};
template <typename Scalar, typename Index, typename... Properties>
MinMaxLoc(View<MinMaxLocScalar<Scalar, Index>, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE MinMaxLoc(
View<MinMaxLocScalar<Scalar, Index>, Properties...> const&)
->MinMaxLoc<Scalar, Index,
typename View<MinMaxLocScalar<Scalar, Index>,
Properties...>::memory_space>;
@ -713,7 +716,8 @@ struct MaxFirstLoc {
};
template <typename Scalar, typename Index, typename... Properties>
MaxFirstLoc(View<ValLocScalar<Scalar, Index>, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE MaxFirstLoc(
View<ValLocScalar<Scalar, Index>, Properties...> const&)
->MaxFirstLoc<Scalar, Index,
typename View<ValLocScalar<Scalar, Index>,
Properties...>::memory_space>;
@ -782,7 +786,7 @@ struct MaxFirstLocCustomComparator {
template <typename Scalar, typename Index, typename ComparatorType,
typename... Properties>
MaxFirstLocCustomComparator(
KOKKOS_DEDUCTION_GUIDE MaxFirstLocCustomComparator(
View<ValLocScalar<Scalar, Index>, Properties...> const&, ComparatorType)
->MaxFirstLocCustomComparator<Scalar, Index, ComparatorType,
typename View<ValLocScalar<Scalar, Index>,
@ -846,7 +850,8 @@ struct MinFirstLoc {
};
template <typename Scalar, typename Index, typename... Properties>
MinFirstLoc(View<ValLocScalar<Scalar, Index>, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE MinFirstLoc(
View<ValLocScalar<Scalar, Index>, Properties...> const&)
->MinFirstLoc<Scalar, Index,
typename View<ValLocScalar<Scalar, Index>,
Properties...>::memory_space>;
@ -915,7 +920,7 @@ struct MinFirstLocCustomComparator {
template <typename Scalar, typename Index, typename ComparatorType,
typename... Properties>
MinFirstLocCustomComparator(
KOKKOS_DEDUCTION_GUIDE MinFirstLocCustomComparator(
View<ValLocScalar<Scalar, Index>, Properties...> const&, ComparatorType)
->MinFirstLocCustomComparator<Scalar, Index, ComparatorType,
typename View<ValLocScalar<Scalar, Index>,
@ -990,7 +995,8 @@ struct MinMaxFirstLastLoc {
};
template <typename Scalar, typename Index, typename... Properties>
MinMaxFirstLastLoc(View<MinMaxLocScalar<Scalar, Index>, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE MinMaxFirstLastLoc(
View<MinMaxLocScalar<Scalar, Index>, Properties...> const&)
->MinMaxFirstLastLoc<Scalar, Index,
typename View<MinMaxLocScalar<Scalar, Index>,
Properties...>::memory_space>;
@ -1069,7 +1075,7 @@ struct MinMaxFirstLastLocCustomComparator {
template <typename Scalar, typename Index, typename ComparatorType,
typename... Properties>
MinMaxFirstLastLocCustomComparator(
KOKKOS_DEDUCTION_GUIDE MinMaxFirstLastLocCustomComparator(
View<MinMaxLocScalar<Scalar, Index>, Properties...> const&, ComparatorType)
->MinMaxFirstLastLocCustomComparator<
Scalar, Index, ComparatorType,
@ -1133,7 +1139,8 @@ struct FirstLoc {
};
template <typename Index, typename... Properties>
FirstLoc(View<FirstLocScalar<Index>, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE FirstLoc(
View<FirstLocScalar<Index>, Properties...> const&)
->FirstLoc<Index, typename View<FirstLocScalar<Index>,
Properties...>::memory_space>;
@ -1194,7 +1201,7 @@ struct LastLoc {
};
template <typename Index, typename... Properties>
LastLoc(View<LastLocScalar<Index>, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE LastLoc(View<LastLocScalar<Index>, Properties...> const&)
->LastLoc<Index,
typename View<LastLocScalar<Index>, Properties...>::memory_space>;
@ -1261,7 +1268,8 @@ struct StdIsPartitioned {
};
template <typename Index, typename... Properties>
StdIsPartitioned(View<StdIsPartScalar<Index>, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE StdIsPartitioned(
View<StdIsPartScalar<Index>, Properties...> const&)
->StdIsPartitioned<Index, typename View<StdIsPartScalar<Index>,
Properties...>::memory_space>;
@ -1323,7 +1331,8 @@ struct StdPartitionPoint {
};
template <typename Index, typename... Properties>
StdPartitionPoint(View<StdPartPointScalar<Index>, Properties...> const&)
KOKKOS_DEDUCTION_GUIDE StdPartitionPoint(
View<StdPartPointScalar<Index>, Properties...> const&)
->StdPartitionPoint<Index, typename View<StdPartPointScalar<Index>,
Properties...>::memory_space>;
@ -1502,18 +1511,18 @@ struct ParallelReduceAdaptor {
using Analysis = FunctorAnalysis<FunctorPatternInterface::REDUCE,
PolicyType, typename ReducerSelector::type,
typename return_value_adapter::value_type>;
Kokkos::Impl::shared_allocation_tracking_disable();
CombinedFunctorReducer functor_reducer(
functor, typename Analysis::Reducer(
ReducerSelector::select(functor, return_value)));
// FIXME Remove "Wrapper" once all backends implement the new interface
Impl::ParallelReduce<decltype(functor_reducer), PolicyType,
typename Impl::FunctorPolicyExecutionSpace<
FunctorType, PolicyType>::execution_space>
closure(functor_reducer, inner_policy,
return_value_adapter::return_value(return_value, functor));
Kokkos::Impl::shared_allocation_tracking_enable();
using CombinedFunctorReducerType =
CombinedFunctorReducer<FunctorType, typename Analysis::Reducer>;
auto closure = construct_with_shared_allocation_tracking_disabled<
Impl::ParallelReduce<CombinedFunctorReducerType, PolicyType,
typename Impl::FunctorPolicyExecutionSpace<
FunctorType, PolicyType>::execution_space>>(
CombinedFunctorReducerType(
functor, typename Analysis::Reducer(
ReducerSelector::select(functor, return_value))),
inner_policy,
return_value_adapter::return_value(return_value, functor));
closure.execute();
Kokkos::Tools::Impl::end_parallel_reduce<PassedReducerType>(

View File

@ -38,6 +38,8 @@ static_assert(false,
#ifdef KOKKOS_ENABLE_IMPL_MDSPAN
#include <View/MDSpan/Kokkos_MDSpan_Extents.hpp>
#include <View/MDSpan/Kokkos_MDSpan_Layout.hpp>
#include <View/MDSpan/Kokkos_MDSpan_Accessor.hpp>
#endif
#include <Kokkos_MinMax.hpp>
@ -372,6 +374,35 @@ struct ViewTraits {
//------------------------------------
};
#ifdef KOKKOS_ENABLE_IMPL_MDSPAN
namespace Impl {
struct UnsupportedKokkosArrayLayout;
template <class Traits, class Enabled = void>
struct MDSpanViewTraits {
using mdspan_type = UnsupportedKokkosArrayLayout;
};
// "Natural" mdspan for a view if the View's ArrayLayout is supported.
template <class Traits>
struct MDSpanViewTraits<Traits,
std::void_t<typename Impl::LayoutFromArrayLayout<
typename Traits::array_layout>::type>> {
using index_type = std::size_t;
using extents_type =
typename Impl::ExtentsFromDataType<index_type,
typename Traits::data_type>::type;
using mdspan_layout_type =
typename Impl::LayoutFromArrayLayout<typename Traits::array_layout>::type;
using accessor_type = Impl::SpaceAwareAccessor<
typename Traits::memory_space,
Kokkos::default_accessor<typename Traits::value_type>>;
using mdspan_type = mdspan<typename Traits::value_type, extents_type,
mdspan_layout_type, accessor_type>;
};
} // namespace Impl
#endif // KOKKOS_ENABLE_IMPL_MDSPAN
/** \class View
* \brief View to an array of data.
*
@ -522,7 +553,6 @@ constexpr bool is_assignable(const Kokkos::View<ViewTDst...>& dst,
//----------------------------------------------------------------------------
#include <impl/Kokkos_ViewMapping.hpp>
#include <impl/Kokkos_ViewArray.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
@ -541,6 +571,8 @@ inline constexpr Kokkos::ALL_t ALL{};
#pragma omp end declare target
#endif
inline constexpr Kokkos::Impl::SequentialHostInit_t SequentialHostInit{};
inline constexpr Kokkos::Impl::WithoutInitializing_t WithoutInitializing{};
inline constexpr Kokkos::Impl::AllowPadding_t AllowPadding{};
@ -923,57 +955,30 @@ class View : public ViewTraits<DataType, Properties...> {
template <typename I0, typename I1>
KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
(Kokkos::Impl::always_true<I0, I1>::value && //
(2 == rank) && is_default_map && is_layout_left && (rank_dynamic == 0)),
(2 == rank) && is_default_map &&
(is_layout_left || is_layout_right || is_layout_stride)),
reference_type>
operator()(I0 i0, I1 i1) const {
check_operator_parens_valid_args(i0, i1);
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1)
return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1];
}
template <typename I0, typename I1>
KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
(Kokkos::Impl::always_true<I0, I1>::value && //
(2 == rank) && is_default_map && is_layout_left && (rank_dynamic != 0)),
reference_type>
operator()(I0 i0, I1 i1) const {
check_operator_parens_valid_args(i0, i1);
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1)
return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1];
}
template <typename I0, typename I1>
KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
(Kokkos::Impl::always_true<I0, I1>::value && //
(2 == rank) && is_default_map && is_layout_right && (rank_dynamic == 0)),
reference_type>
operator()(I0 i0, I1 i1) const {
check_operator_parens_valid_args(i0, i1);
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1)
return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0];
}
template <typename I0, typename I1>
KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
(Kokkos::Impl::always_true<I0, I1>::value && //
(2 == rank) && is_default_map && is_layout_right && (rank_dynamic != 0)),
reference_type>
operator()(I0 i0, I1 i1) const {
check_operator_parens_valid_args(i0, i1);
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1)
return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0];
}
template <typename I0, typename I1>
KOKKOS_FORCEINLINE_FUNCTION
std::enable_if_t<(Kokkos::Impl::always_true<I0, I1>::value && //
(2 == rank) && is_default_map && is_layout_stride),
reference_type>
operator()(I0 i0, I1 i1) const {
check_operator_parens_valid_args(i0, i1);
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1)
return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 +
i1 * m_map.m_impl_offset.m_stride.S1];
if constexpr (is_layout_left) {
if constexpr (rank_dynamic == 0)
return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1];
else
return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1];
} else if constexpr (is_layout_right) {
if constexpr (rank_dynamic == 0)
return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0];
else
return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0];
} else {
static_assert(is_layout_stride);
return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 +
i1 * m_map.m_impl_offset.m_stride.S1];
}
#if defined KOKKOS_COMPILER_INTEL
__builtin_unreachable();
#endif
}
// Rank 0 -> 8 operator() except for rank-1 and rank-2 with default map which
@ -1066,57 +1071,30 @@ class View : public ViewTraits<DataType, Properties...> {
template <typename I0, typename I1, typename... Is>
KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
(Kokkos::Impl::always_true<I0, I1, Is...>::value && (2 == rank) &&
is_default_map && is_layout_left && (rank_dynamic == 0)),
is_default_map &&
(is_layout_left || is_layout_right || is_layout_stride)),
reference_type>
access(I0 i0, I1 i1, Is... extra) const {
check_access_member_function_valid_args(i0, i1, extra...);
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...)
return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1];
}
template <typename I0, typename I1, typename... Is>
KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
(Kokkos::Impl::always_true<I0, I1, Is...>::value && (2 == rank) &&
is_default_map && is_layout_left && (rank_dynamic != 0)),
reference_type>
access(I0 i0, I1 i1, Is... extra) const {
check_access_member_function_valid_args(i0, i1, extra...);
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...)
return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1];
}
template <typename I0, typename I1, typename... Is>
KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
(Kokkos::Impl::always_true<I0, I1, Is...>::value && (2 == rank) &&
is_default_map && is_layout_right && (rank_dynamic == 0)),
reference_type>
access(I0 i0, I1 i1, Is... extra) const {
check_access_member_function_valid_args(i0, i1, extra...);
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...)
return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0];
}
template <typename I0, typename I1, typename... Is>
KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t<
(Kokkos::Impl::always_true<I0, I1, Is...>::value && (2 == rank) &&
is_default_map && is_layout_right && (rank_dynamic != 0)),
reference_type>
access(I0 i0, I1 i1, Is... extra) const {
check_access_member_function_valid_args(i0, i1, extra...);
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...)
return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0];
}
template <typename I0, typename I1, typename... Is>
KOKKOS_FORCEINLINE_FUNCTION
std::enable_if_t<(Kokkos::Impl::always_true<I0, I1, Is...>::value &&
(2 == rank) && is_default_map && is_layout_stride),
reference_type>
access(I0 i0, I1 i1, Is... extra) const {
check_access_member_function_valid_args(i0, i1, extra...);
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...)
return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 +
i1 * m_map.m_impl_offset.m_stride.S1];
if constexpr (is_layout_left) {
if constexpr (rank_dynamic == 0)
return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1];
else
return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1];
} else if constexpr (is_layout_right) {
if constexpr (rank_dynamic == 0)
return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0];
else
return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0];
} else {
static_assert(is_layout_stride);
return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 +
i1 * m_map.m_impl_offset.m_stride.S1];
}
#if defined KOKKOS_COMPILER_INTEL
__builtin_unreachable();
#endif
}
//------------------------------
@ -1442,8 +1420,7 @@ class View : public ViewTraits<DataType, Properties...> {
std::is_same_v<typename traits::array_layout,
Kokkos::LayoutRight> ||
std::is_same_v<typename traits::array_layout,
Kokkos::LayoutStride> ||
is_layouttiled<typename traits::array_layout>::value) {
Kokkos::LayoutStride>) {
size_t i0 = arg_layout.dimension[0];
size_t i1 = arg_layout.dimension[1];
size_t i2 = arg_layout.dimension[2];
@ -1495,8 +1472,7 @@ class View : public ViewTraits<DataType, Properties...> {
std::is_same_v<typename traits::array_layout,
Kokkos::LayoutRight> ||
std::is_same_v<typename traits::array_layout,
Kokkos::LayoutStride> ||
is_layouttiled<typename traits::array_layout>::value) {
Kokkos::LayoutStride>) {
size_t i0 = arg_layout.dimension[0];
size_t i1 = arg_layout.dimension[1];
size_t i2 = arg_layout.dimension[2];
@ -1725,6 +1701,79 @@ class View : public ViewTraits<DataType, Properties...> {
"Layout is not constructible from extent arguments. Use "
"overload taking a layout object instead.");
}
//----------------------------------------
// MDSpan converting constructors
#ifdef KOKKOS_ENABLE_IMPL_MDSPAN
template <typename U = typename Impl::MDSpanViewTraits<traits>::mdspan_type>
KOKKOS_INLINE_FUNCTION
#ifndef KOKKOS_ENABLE_CXX17
explicit(traits::is_managed)
#endif
View(const typename Impl::MDSpanViewTraits<traits>::mdspan_type& mds,
std::enable_if_t<
!std::is_same_v<Impl::UnsupportedKokkosArrayLayout, U>>* =
nullptr)
: View(mds.data_handle(),
Impl::array_layout_from_mapping<
typename traits::array_layout,
typename Impl::MDSpanViewTraits<traits>::mdspan_type>(
mds.mapping())) {
}
template <class ElementType, class ExtentsType, class LayoutType,
class AccessorType>
KOKKOS_INLINE_FUNCTION
#ifndef KOKKOS_ENABLE_CXX17
explicit(!std::is_convertible_v<
Kokkos::mdspan<ElementType, ExtentsType, LayoutType,
AccessorType>,
typename Impl::MDSpanViewTraits<traits>::mdspan_type>)
#endif
View(const Kokkos::mdspan<ElementType, ExtentsType, LayoutType,
AccessorType>& mds)
: View(typename Impl::MDSpanViewTraits<traits>::mdspan_type(mds)) {
}
//----------------------------------------
// Conversion to MDSpan
template <class OtherElementType, class OtherExtents, class OtherLayoutPolicy,
class OtherAccessor,
class ImplNaturalMDSpanType =
typename Impl::MDSpanViewTraits<traits>::mdspan_type,
typename = std::enable_if_t<std::conditional_t<
std::is_same_v<Impl::UnsupportedKokkosArrayLayout,
ImplNaturalMDSpanType>,
std::false_type,
std::is_assignable<mdspan<OtherElementType, OtherExtents,
OtherLayoutPolicy, OtherAccessor>,
ImplNaturalMDSpanType>>::value>>
KOKKOS_INLINE_FUNCTION constexpr operator mdspan<
OtherElementType, OtherExtents, OtherLayoutPolicy, OtherAccessor>() {
using mdspan_type = typename Impl::MDSpanViewTraits<traits>::mdspan_type;
return mdspan_type{data(),
Impl::mapping_from_view_mapping<mdspan_type>(m_map)};
}
template <class OtherAccessorType = Impl::SpaceAwareAccessor<
typename traits::memory_space,
Kokkos::default_accessor<typename traits::value_type>>,
typename = std::enable_if_t<std::is_assignable_v<
typename traits::value_type*&,
typename OtherAccessorType::data_handle_type>>>
KOKKOS_INLINE_FUNCTION constexpr auto to_mdspan(
const OtherAccessorType& other_accessor =
typename Impl::MDSpanViewTraits<traits>::accessor_type()) {
using mdspan_type = typename Impl::MDSpanViewTraits<traits>::mdspan_type;
using ret_mdspan_type =
mdspan<typename mdspan_type::element_type,
typename mdspan_type::extents_type,
typename mdspan_type::layout_type, OtherAccessorType>;
return ret_mdspan_type{data(),
Impl::mapping_from_view_mapping<mdspan_type>(m_map),
other_accessor};
}
#endif // KOKKOS_ENABLE_IMPL_MDSPAN
};
template <typename D, class... P>
@ -1878,23 +1927,6 @@ KOKKOS_INLINE_FUNCTION bool operator!=(const View<LT, LP...>& lhs,
namespace Kokkos {
namespace Impl {
inline void shared_allocation_tracking_disable() {
Kokkos::Impl::SharedAllocationRecord<void, void>::tracking_disable();
}
inline void shared_allocation_tracking_enable() {
Kokkos::Impl::SharedAllocationRecord<void, void>::tracking_enable();
}
} /* namespace Impl */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template <class Specialize, typename A, typename B>
struct CommonViewValueType;

View File

@ -67,16 +67,7 @@ void *Kokkos::Experimental::OpenACCSpace::impl_allocate(
ptr = acc_malloc(arg_alloc_size);
if (!ptr) {
size_t alignment = 1; // OpenACC does not handle alignment
using Kokkos::Experimental::RawMemoryAllocationFailure;
auto failure_mode =
arg_alloc_size > 0
? RawMemoryAllocationFailure::FailureMode::OutOfMemoryError
: RawMemoryAllocationFailure::FailureMode::InvalidAllocationSize;
auto alloc_mechanism =
RawMemoryAllocationFailure::AllocationMechanism::OpenACCMalloc;
throw RawMemoryAllocationFailure(arg_alloc_size, alignment, failure_mode,
alloc_mechanism);
Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label);
}
if (Kokkos::Profiling::profileLibraryLoaded()) {

View File

@ -44,10 +44,12 @@ class Kokkos::Impl::ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
auto team_size = m_policy.team_size();
auto vector_length = m_policy.impl_vector_length();
int const async_arg = m_policy.space().acc_async_queue();
auto const a_functor(m_functor);
#pragma acc parallel loop gang vector num_gangs(league_size) \
vector_length(team_size* vector_length) copyin(a_functor)
vector_length(team_size* vector_length) copyin(a_functor) async(async_arg)
for (int i = 0; i < league_size * team_size * vector_length; i++) {
int league_id = i / (team_size * vector_length);
typename Policy::member_type team(league_id, league_size, team_size,
@ -145,10 +147,12 @@ class Kokkos::Impl::ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
auto team_size = m_policy.team_size();
auto vector_length = m_policy.impl_vector_length();
int const async_arg = m_policy.space().acc_async_queue();
auto const a_functor(m_functor);
#pragma acc parallel loop gang num_gangs(league_size) num_workers(team_size) \
vector_length(vector_length) copyin(a_functor)
vector_length(vector_length) copyin(a_functor) async(async_arg)
for (int i = 0; i < league_size; i++) {
int league_id = i;
typename Policy::member_type team(league_id, league_size, team_size,

View File

@ -72,9 +72,28 @@ int OpenMP::concurrency(OpenMP const &instance) {
int OpenMP::concurrency() const { return impl_thread_pool_size(); }
#endif
void OpenMP::impl_static_fence(std::string const &name) {
Kokkos::Tools::Experimental::Impl::profile_fence_event<Kokkos::OpenMP>(
name,
Kokkos::Tools::Experimental::SpecialSynchronizationCases::
GlobalDeviceSynchronization,
[]() {
std::lock_guard<std::mutex> lock_all_instances(
Impl::OpenMPInternal::all_instances_mutex);
for (auto *instance_ptr : Impl::OpenMPInternal::all_instances) {
std::lock_guard<std::mutex> lock_instance(
instance_ptr->m_instance_mutex);
}
});
}
void OpenMP::fence(const std::string &name) const {
Kokkos::Tools::Experimental::Impl::profile_fence_event<Kokkos::OpenMP>(
name, Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{1}, []() {});
name, Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{1},
[this]() {
auto *internal_instance = this->impl_internal_space_instance();
std::lock_guard<std::mutex> lock(internal_instance->m_instance_mutex);
});
}
bool OpenMP::impl_is_initialized() noexcept {
@ -94,7 +113,7 @@ int OpenMP::impl_thread_pool_size() const noexcept {
}
int OpenMP::impl_max_hardware_threads() noexcept {
return Impl::g_openmp_hardware_max_threads;
return Impl::OpenMPInternal::max_hardware_threads();
}
namespace Impl {

View File

@ -67,7 +67,15 @@ class OpenMP {
OpenMP();
OpenMP(int pool_size);
explicit OpenMP(int pool_size);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
template <typename T = void>
KOKKOS_DEPRECATED_WITH_COMMENT(
"OpenMP execution space should be constructed explicitly.")
OpenMP(int pool_size)
: OpenMP(pool_size) {}
#endif
/// \brief Print configuration information to the given output stream.
void print_configuration(std::ostream& os, bool verbose = false) const;
@ -146,14 +154,6 @@ inline int OpenMP::impl_thread_pool_rank() noexcept {
KOKKOS_IF_ON_DEVICE((return -1;))
}
inline void OpenMP::impl_static_fence(std::string const& name) {
Kokkos::Tools::Experimental::Impl::profile_fence_event<Kokkos::OpenMP>(
name,
Kokkos::Tools::Experimental::SpecialSynchronizationCases::
GlobalDeviceSynchronization,
[]() {});
}
inline bool OpenMP::is_asynchronous(OpenMP const& /*instance*/) noexcept {
return false;
}

View File

@ -31,20 +31,18 @@
#include <sstream>
#include <thread>
namespace {
int g_openmp_hardware_max_threads = 1;
}
namespace Kokkos {
namespace Impl {
void OpenMPInternal::acquire_lock() {
while (1 == desul::atomic_compare_exchange(&m_pool_mutex, 0, 1,
desul::MemoryOrderAcquire(),
desul::MemoryScopeDevice())) {
// do nothing
}
}
std::vector<OpenMPInternal *> OpenMPInternal::all_instances;
std::mutex OpenMPInternal::all_instances_mutex;
void OpenMPInternal::release_lock() {
desul::atomic_store(&m_pool_mutex, 0, desul::MemoryOrderRelease(),
desul::MemoryScopeDevice());
int OpenMPInternal::max_hardware_threads() noexcept {
return g_openmp_hardware_max_threads;
}
void OpenMPInternal::clear_thread_data() {
@ -123,17 +121,11 @@ void OpenMPInternal::resize_thread_data(size_t pool_reduce_bytes,
if (nullptr != m_pool[rank]) {
m_pool[rank]->disband_pool();
space.deallocate(m_pool[rank], old_alloc_bytes);
// impl_deallocate to not fence here
space.impl_deallocate("[unlabeled]", m_pool[rank], old_alloc_bytes);
}
void *ptr = nullptr;
try {
ptr = space.allocate(alloc_bytes);
} catch (
Kokkos::Experimental::RawMemoryAllocationFailure const &failure) {
// For now, just rethrow the error message the existing way
Kokkos::Impl::throw_runtime_exception(failure.get_error_message());
}
void *ptr = space.allocate("Kokkos::OpenMP::scratch_mem", alloc_bytes);
m_pool[rank] = new (ptr) HostThreadTeamData();
@ -204,9 +196,9 @@ void OpenMPInternal::initialize(int thread_count) {
// Before any other call to OMP query the maximum number of threads
// and save the value for re-initialization unit testing.
Impl::g_openmp_hardware_max_threads = get_current_max_threads();
g_openmp_hardware_max_threads = get_current_max_threads();
int process_num_threads = Impl::g_openmp_hardware_max_threads;
int process_num_threads = g_openmp_hardware_max_threads;
if (Kokkos::hwloc::available()) {
process_num_threads = Kokkos::hwloc::get_available_numa_count() *
@ -219,11 +211,11 @@ void OpenMPInternal::initialize(int thread_count) {
// process_num_threads if thread_count > 0, set
// g_openmp_hardware_max_threads to thread_count
if (thread_count < 0) {
thread_count = Impl::g_openmp_hardware_max_threads;
thread_count = g_openmp_hardware_max_threads;
} else if (thread_count == 0) {
if (Impl::g_openmp_hardware_max_threads != process_num_threads) {
Impl::g_openmp_hardware_max_threads = process_num_threads;
omp_set_num_threads(Impl::g_openmp_hardware_max_threads);
if (g_openmp_hardware_max_threads != process_num_threads) {
g_openmp_hardware_max_threads = process_num_threads;
omp_set_num_threads(g_openmp_hardware_max_threads);
}
} else {
if (Kokkos::show_warnings() && thread_count > process_num_threads) {
@ -234,16 +226,16 @@ void OpenMPInternal::initialize(int thread_count) {
<< ", requested thread : " << std::setw(3) << thread_count
<< std::endl;
}
Impl::g_openmp_hardware_max_threads = thread_count;
omp_set_num_threads(Impl::g_openmp_hardware_max_threads);
g_openmp_hardware_max_threads = thread_count;
omp_set_num_threads(g_openmp_hardware_max_threads);
}
// setup thread local
#pragma omp parallel num_threads(Impl::g_openmp_hardware_max_threads)
#pragma omp parallel num_threads(g_openmp_hardware_max_threads)
{ Impl::SharedAllocationRecord<void, void>::tracking_enable(); }
auto &instance = OpenMPInternal::singleton();
instance.m_pool_size = Impl::g_openmp_hardware_max_threads;
instance.m_pool_size = g_openmp_hardware_max_threads;
// New, unified host thread team data:
{
@ -288,10 +280,9 @@ void OpenMPInternal::finalize() {
if (this == &singleton()) {
auto const &instance = singleton();
// Silence Cuda Warning
const int nthreads =
instance.m_pool_size <= Impl::g_openmp_hardware_max_threads
? Impl::g_openmp_hardware_max_threads
: instance.m_pool_size;
const int nthreads = instance.m_pool_size <= g_openmp_hardware_max_threads
? g_openmp_hardware_max_threads
: instance.m_pool_size;
(void)nthreads;
#pragma omp parallel num_threads(nthreads)
@ -300,10 +291,22 @@ void OpenMPInternal::finalize() {
// allow main thread to track
Impl::SharedAllocationRecord<void, void>::tracking_enable();
Impl::g_openmp_hardware_max_threads = 1;
g_openmp_hardware_max_threads = 1;
}
m_initialized = false;
// guard erasing from all_instances
{
std::scoped_lock lock(all_instances_mutex);
auto it = std::find(all_instances.begin(), all_instances.end(), this);
if (it == all_instances.end())
Kokkos::abort(
"Execution space instance to be removed couldn't be found!");
*it = all_instances.back();
all_instances.pop_back();
}
}
void OpenMPInternal::print_configuration(std::ostream &s) const {
@ -311,7 +314,7 @@ void OpenMPInternal::print_configuration(std::ostream &s) const {
if (m_initialized) {
const int numa_count = 1;
const int core_per_numa = Impl::g_openmp_hardware_max_threads;
const int core_per_numa = g_openmp_hardware_max_threads;
const int thread_per_core = 1;
s << " thread_pool_topology[ " << numa_count << " x " << core_per_numa

Some files were not shown because too many files have changed in this diff Show More