Merge branch 'develop' into general-triclinic

This commit is contained in:
Steve Plimpton
2024-04-05 15:50:55 -06:00
398 changed files with 13332 additions and 12513 deletions

View File

@ -45,8 +45,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject)
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.2.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "16b9b09ae947d434dfb58fc5c87c2b76" CACHE STRING "MD5 checksum of KOKKOS tarball")
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.3.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "889dcea2b5ced3debdc5b0820044bdc4" CACHE STRING "MD5 checksum of KOKKOS tarball")
mark_as_advanced(KOKKOS_URL)
mark_as_advanced(KOKKOS_MD5)
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
@ -71,7 +71,7 @@ if(DOWNLOAD_KOKKOS)
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 4.2.01 REQUIRED CONFIG)
find_package(Kokkos 4.3.00 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else()
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)

View File

@ -533,9 +533,6 @@ They must be specified in uppercase.
* - A64FX
- HOST
- ARMv8.2 with SVE Support
* - WSM
- HOST
- Intel Westmere CPU (SSE 4.2)
* - SNB
- HOST
- Intel Sandy/Ivy Bridge CPU (AVX 1)
@ -566,18 +563,15 @@ They must be specified in uppercase.
* - KNL
- HOST
- Intel Knights Landing Xeon Phi
* - BGQ
- HOST
- IBM Blue Gene/Q CPU
* - POWER7
- HOST
- IBM POWER7 CPU
* - POWER8
- HOST
- IBM POWER8 CPU
* - POWER9
- HOST
- IBM POWER9 CPU
* - RISCV_SG2042
- HOST
- SG2042 (RISC-V) CPU
* - KEPLER30
- GPU
- NVIDIA Kepler generation CC 3.0 GPU
@ -666,7 +660,7 @@ They must be specified in uppercase.
- GPU
- Intel GPU Ponte Vecchio
This list was last updated for version 4.2 of the Kokkos library.
This list was last updated for version 4.3.0 of the Kokkos library.
.. tabs::

View File

@ -1,5 +1,105 @@
# CHANGELOG
## [4.3.00](https://github.com/kokkos/kokkos/tree/4.3.00) (2024-03-19)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.01...4.3.00)
### Features:
* Add `Experimental::sort_by_key(exec, keys, values)` algorithm [\#6801](https://github.com/kokkos/kokkos/pull/6801)
### Backend and Architecture Enhancements:
#### CUDA:
* Experimental multi-GPU support (from the same process) [\#6782](https://github.com/kokkos/kokkos/pull/6782)
* Link against CUDA libraries even with KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE [\#6701](https://github.com/kokkos/kokkos/pull/6701)
* Don't use the compiler launcher script if the CMake compile language is CUDA. [\#6704](https://github.com/kokkos/kokkos/pull/6704)
* nvcc(wrapper): adding "long" and "short" versions for all flags [\#6615](https://github.com/kokkos/kokkos/pull/6615)
#### HIP:
* Fix compilation when using amdclang (with ROCm >= 5.7) and RDC [\#6857](https://github.com/kokkos/kokkos/pull/6857)
* Use rocthrust for sorting, when available [\#6793](https://github.com/kokkos/kokkos/pull/6793)
#### SYCL:
* We only support OneAPI SYCL implementation: add check during initialization
* Error out on initialization if the backend is different from `ext_oneapi_*` [\#6784](https://github.com/kokkos/kokkos/pull/6784)
* Filter GPU devices for `ext_onapi_*` GPU devices [\#6758](https://github.com/kokkos/kokkos/pull/6784)
* Performance Improvements
* Avoid unnecessary zero-memset of the scratch flags in SYCL [\#6739](https://github.com/kokkos/kokkos/pull/6739)
* Use host-pinned memory to copy reduction/scan result [\#6500](https://github.com/kokkos/kokkos/pull/6500)
* Address deprecations after oneAPI 2023.2.0 [\#6577](https://github.com/kokkos/kokkos/pull/6739)
* Make sure to call find_dependency for oneDPL if necessary [\#6870](https://github.com/kokkos/kokkos/pull/6870)
#### OpenMPTarget:
* Use LLVM extensions for dynamic shared memory [\#6380](https://github.com/kokkos/kokkos/pull/6380)
* Guard scratch memory usage in ParallelReduce [\#6585 ](https://github.com/kokkos/kokkos/pull/6585)
* Update linker flags for Intel GPUs update [\#6735](https://github.com/kokkos/kokkos/pull/6735)
* Improve handling of printf on Intel GPUs [\#6652](https://github.com/kokkos/kokkos/pull/6652)
#### OpenACC:
* Add atomics support [\#6446](https://github.com/kokkos/kokkos/pull/6446)
* Make the OpenACC backend asynchronous [\#6772](https://github.com/kokkos/kokkos/pull/6772)
#### Threads:
* Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6446)
#### OpenMP:
* Improve performance of view initializations and filling with zeros [\#6573](https://github.com/kokkos/kokkos/pull/6573)
### General Enhancements
* Improve performance of random number generation when using a normal distribution on GPUs [\#6556](https://github.com/kokkos/kokkos/pull/6556)
* Allocate temporary view with the user-provided execution space instance and do not initialize in `unique` algorithm [\#6598](https://github.com/kokkos/kokkos/pull/6598)
* Add deduction guide for `Kokkos::Array` [\#6373](https://github.com/kokkos/kokkos/pull/6373)
* Provide new public headers `<Kokkos_Clamp.hpp>` and `<Kokkos_MinMax.hpp>` [\#6687](https://github.com/kokkos/kokkos/pull/6687)
* Fix/improvement to `remove_if` parallel algorithm: use the provided execution space instance for temporary allocations and drop unnecessaryinitialization + avoid evaluating twice the predicate during final pass [\#6747](https://github.com/kokkos/kokkos/pull/6747)
* Add runtime function to query the number of devices and make device ID consistent with `KOKKOS_VISIBLE_DEVICES` [\#6713](https://github.com/kokkos/kokkos/pull/6713)
* simd: support `vector_aligned_tag` [\#6243](https://github.com/kokkos/kokkos/pull/6243)
* Avoid unnecessary allocation when default constructing Bitset [\#6524](https://github.com/kokkos/kokkos/pull/6524)
* Fix constness for views in std algorithms [\#6813](https://github.com/kokkos/kokkos/pull/6813)
* Improve error message on unsafe implicit conversion in MDRangePolicy [\#6855](https://github.com/kokkos/kokkos/pull/6855)
* CTAD (deduction guides) for RangePolicy [\#6850](https://github.com/kokkos/kokkos/pull/6850)
* CTAD (deduction guides) for MDRangePolicy [\#5516](https://github.com/kokkos/kokkos/pull/5516)
### Build System Changes
* Require `Kokkos_ENABLE_ATOMICS_BYPASS` option to bypass atomic operation for Serial backend only builds [\#6692](https://github.com/kokkos/kokkos/pull/6692)
* Add support for RISCV and the Milk-V's Pioneer [\#6773](https://github.com/kokkos/kokkos/pull/6773)
* Add C++26 standard to CMake setup [\#6733](https://github.com/kokkos/kokkos/pull/6733)
* Fix Makefile when using gnu_generate_makefile.sh and make >= 4.3 [\#6606](https://github.com/kokkos/kokkos/pull/6606)
* Cuda: Fix configuring with CMake >= 3.28.4 - temporary fallback to internal CudaToolkit.cmake [\#6898](https://github.com/kokkos/kokkos/pull/6898)
### Incompatibilities (i.e. breaking changes)
* Remove all `DEPRECATED_CODE_3` option and all code that was guarded by it [\#6523](https://github.com/kokkos/kokkos/pull/6523)
* Drop guards to accommodate external code defining `KOKKOS_ASSERT` [\#6665](https://github.com/kokkos/kokkos/pull/6665)
* `Profiling::ProfilingSection(std::string)` constructor marked explicit and nodiscard [\#6690](https://github.com/kokkos/kokkos/pull/6690)
* Add bound check preconditions for `RangePolicy` and `MDRangePolicy` [\#6617](https://github.com/kokkos/kokkos/pull/6617) [\#6726](https://github.com/kokkos/kokkos/pull/6726)
* Add checks for unsafe implicit conversions in RangePolicy [\#6754](https://github.com/kokkos/kokkos/pull/6754)
* Remove Kokkos::[b]half_t volatile overloads [\#6579](https://github.com/kokkos/kokkos/pull/6579)
* Remove KOKKOS_IMPL_DO_NOT_USE_PRINTF [\#6593](https://github.com/kokkos/kokkos/pull/6593)
* Check matching static extents in View constructor [\#5190 ](https://github.com/kokkos/kokkos/pull/5190)
* Tools(profiling): fix typo Kokkos_Tools_Optim[i]zationGoal [\#6642](https://github.com/kokkos/kokkos/pull/6642)
* Remove variadic range policy constructor (disallow passing multiple trailing chunk size arguments) [\#6845](https://github.com/kokkos/kokkos/pull/6845)
* Improve message on view out of bounds access and always abort [\#6861](https://github.com/kokkos/kokkos/pull/6861)
* Drop `KOKKOS_ENABLE_INTEL_MM_ALLOC` macro [\#6797](https://github.com/kokkos/kokkos/pull/6797)
* Remove `Kokkos::Experimental::LogicalMemorySpace` (without going through deprecation) [\#6557](https://github.com/kokkos/kokkos/pull/6557)
* Remove `Experimental::HBWSpace` and support for linking against memkind [\#6791](https://github.com/kokkos/kokkos/pull/6791)
* Drop librt TPL and associated `KOKKOS_ENABLE_LIBRT` macro [\#6798](https://github.com/kokkos/kokkos/pull/6798)
* Drop support for old CPU architectures (`ARCH_BGQ`, `ARCH_POWER7`, `ARCH_WSM` and associated `ARCH_SSE4` macro) [\#6806](https://github.com/kokkos/kokkos/pull/6806)
* Drop support for deprecated command-line arguments and environment variables [\#6744](https://github.com/kokkos/kokkos/pull/6744)
### Deprecations
* Provide kokkos_swap as part of Core and deprecate Experimental::swap in Algorithms [\#6697](https://github.com/kokkos/kokkos/pull/6697)
* Deprecate {Cuda,HIP}::detect_device_count() and Cuda::[detect_]device_arch() [\#6710](https://github.com/kokkos/kokkos/pull/6710)
* Deprecate `ExecutionSpace::in_parallel()` [\#6582](https://github.com/kokkos/kokkos/pull/6582)
### Bug Fixes
* Fix team-level MDRange reductions: [\#6511](https://github.com/kokkos/kokkos/pull/6511)
* Fix CUDA and SYCL small value type (16-bit) team reductions [\#5334](https://github.com/kokkos/kokkos/pull/5334)
* Enable `{transform_}exclusive_scan` in place [\#6667](https://github.com/kokkos/kokkos/pull/6667)
* `fill_random` overload that do not take an execution space instance argument should fence [\#6658](https://github.com/kokkos/kokkos/pull/6658)
* HIP,Cuda,OpenMPTarget: Fixup use provided execution space when copying host inaccessible reduction result [\#6777](https://github.com/kokkos/kokkos/pull/6777)
* Fix typo in `cuda_func_set_attribute[s]_wrapper` preventing proper setting of desired occupancy [\#6786](https://github.com/kokkos/kokkos/pull/6786)
* Avoid undefined behavior due to conversion between signed and unsigned integers in shift_{right, left}_team_impl [\#6821](https://github.com/kokkos/kokkos/pull/6821)
* Fix a bug in Makefile.kokkos when using AMD GPU architectures as `AMD_GFXYYY` [\#6892](https://github.com/kokkos/kokkos/pull/6892)
## [4.2.01](https://github.com/kokkos/kokkos/tree/4.2.01) (2023-12-07)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.00...4.2.01)
@ -999,95 +1099,95 @@
- Major update for OpenMPTarget: many capabilities now work. For details contact us.
- Added DPC++/SYCL backend: primary capabilites are working.
- Added Kokkos Graph API analogous to CUDA Graphs.
- Added parallel_scan support with TeamThreadRange [\#3536](https://github.com/kokkos/kokkos/pull/#3536)
- Added Logical Memory Spaces [\#3546](https://github.com/kokkos/kokkos/pull/#3546)
- Added initial half precision support [\#3439](https://github.com/kokkos/kokkos/pull/#3439)
- Experimental feature: control cuda occupancy [\#3379](https://github.com/kokkos/kokkos/pull/#3379)
- Added parallel_scan support with TeamThreadRange [\#3536](https://github.com/kokkos/kokkos/pull/3536)
- Added Logical Memory Spaces [\#3546](https://github.com/kokkos/kokkos/pull/3546)
- Added initial half precision support [\#3439](https://github.com/kokkos/kokkos/pull/3439)
- Experimental feature: control cuda occupancy [\#3379](https://github.com/kokkos/kokkos/pull/3379)
**Implemented enhancements Backends and Archs:**
- Add a64fx and fujitsu Compiler support [\#3614](https://github.com/kokkos/kokkos/pull/#3614)
- Adding support for AMD gfx908 archictecture [\#3375](https://github.com/kokkos/kokkos/pull/#3375)
- SYCL parallel\_for MDRangePolicy [\#3583](https://github.com/kokkos/kokkos/pull/#3583)
- SYCL add parallel\_scan [\#3577](https://github.com/kokkos/kokkos/pull/#3577)
- SYCL custom reductions [\#3544](https://github.com/kokkos/kokkos/pull/#3544)
- SYCL Enable container unit tests [\#3550](https://github.com/kokkos/kokkos/pull/#3550)
- SYCL feature level 5 [\#3480](https://github.com/kokkos/kokkos/pull/#3480)
- SYCL Feature level 4 (parallel\_for) [\#3474](https://github.com/kokkos/kokkos/pull/#3474)
- SYCL feature level 3 [\#3451](https://github.com/kokkos/kokkos/pull/#3451)
- SYCL feature level 2 [\#3447](https://github.com/kokkos/kokkos/pull/#3447)
- OpenMPTarget: Hierarchial reduction for + operator on scalars [\#3504](https://github.com/kokkos/kokkos/pull/#3504)
- OpenMPTarget hierarchical [\#3411](https://github.com/kokkos/kokkos/pull/#3411)
- HIP Add Impl::atomic\_[store,load] [\#3440](https://github.com/kokkos/kokkos/pull/#3440)
- HIP enable global lock arrays [\#3418](https://github.com/kokkos/kokkos/pull/#3418)
- HIP Implement multiple occupancy paths for various HIP kernel launchers [\#3366](https://github.com/kokkos/kokkos/pull/#3366)
- Add a64fx and fujitsu Compiler support [\#3614](https://github.com/kokkos/kokkos/pull/3614)
- Adding support for AMD gfx908 archictecture [\#3375](https://github.com/kokkos/kokkos/pull/3375)
- SYCL parallel\_for MDRangePolicy [\#3583](https://github.com/kokkos/kokkos/pull/3583)
- SYCL add parallel\_scan [\#3577](https://github.com/kokkos/kokkos/pull/3577)
- SYCL custom reductions [\#3544](https://github.com/kokkos/kokkos/pull/3544)
- SYCL Enable container unit tests [\#3550](https://github.com/kokkos/kokkos/pull/3550)
- SYCL feature level 5 [\#3480](https://github.com/kokkos/kokkos/pull/3480)
- SYCL Feature level 4 (parallel\_for) [\#3474](https://github.com/kokkos/kokkos/pull/3474)
- SYCL feature level 3 [\#3451](https://github.com/kokkos/kokkos/pull/3451)
- SYCL feature level 2 [\#3447](https://github.com/kokkos/kokkos/pull/3447)
- OpenMPTarget: Hierarchial reduction for + operator on scalars [\#3504](https://github.com/kokkos/kokkos/pull/3504)
- OpenMPTarget hierarchical [\#3411](https://github.com/kokkos/kokkos/pull/3411)
- HIP Add Impl::atomic\_[store,load] [\#3440](https://github.com/kokkos/kokkos/pull/3440)
- HIP enable global lock arrays [\#3418](https://github.com/kokkos/kokkos/pull/3418)
- HIP Implement multiple occupancy paths for various HIP kernel launchers [\#3366](https://github.com/kokkos/kokkos/pull/3366)
**Implemented enhancements Policies:**
- MDRangePolicy: Let it be semiregular [\#3494](https://github.com/kokkos/kokkos/pull/#3494)
- MDRangePolicy: Check narrowing conversion in construction [\#3527](https://github.com/kokkos/kokkos/pull/#3527)
- MDRangePolicy: CombinedReducers support [\#3395](https://github.com/kokkos/kokkos/pull/#3395)
- Kokkos Graph: Interface and Default Implementation [\#3362](https://github.com/kokkos/kokkos/pull/#3362)
- Kokkos Graph: add Cuda Graph implementation [\#3369](https://github.com/kokkos/kokkos/pull/#3369)
- TeamPolicy: implemented autotuning of team sizes and vector lengths [\#3206](https://github.com/kokkos/kokkos/pull/#3206)
- RangePolicy: Initialize all data members in default constructor [\#3509](https://github.com/kokkos/kokkos/pull/#3509)
- MDRangePolicy: Let it be semiregular [\#3494](https://github.com/kokkos/kokkos/pull/3494)
- MDRangePolicy: Check narrowing conversion in construction [\#3527](https://github.com/kokkos/kokkos/pull/3527)
- MDRangePolicy: CombinedReducers support [\#3395](https://github.com/kokkos/kokkos/pull/3395)
- Kokkos Graph: Interface and Default Implementation [\#3362](https://github.com/kokkos/kokkos/pull/3362)
- Kokkos Graph: add Cuda Graph implementation [\#3369](https://github.com/kokkos/kokkos/pull/3369)
- TeamPolicy: implemented autotuning of team sizes and vector lengths [\#3206](https://github.com/kokkos/kokkos/pull/3206)
- RangePolicy: Initialize all data members in default constructor [\#3509](https://github.com/kokkos/kokkos/pull/3509)
**Implemented enhancements BuildSystem:**
- Auto-generate core test files for all backends [\#3488](https://github.com/kokkos/kokkos/pull/#3488)
- Avoid rewriting test files when calling cmake [\#3548](https://github.com/kokkos/kokkos/pull/#3548)
- RULE\_LAUNCH\_COMPILE and RULE\_LAUNCH\_LINK system for nvcc\_wrapper [\#3136](https://github.com/kokkos/kokkos/pull/#3136)
- Adding -include as a known argument to nvcc\_wrapper [\#3434](https://github.com/kokkos/kokkos/pull/#3434)
- Install hpcbind script [\#3402](https://github.com/kokkos/kokkos/pull/#3402)
- cmake/kokkos\_tribits.cmake: add parsing for args [\#3457](https://github.com/kokkos/kokkos/pull/#3457)
- Auto-generate core test files for all backends [\#3488](https://github.com/kokkos/kokkos/pull/3488)
- Avoid rewriting test files when calling cmake [\#3548](https://github.com/kokkos/kokkos/pull/3548)
- RULE\_LAUNCH\_COMPILE and RULE\_LAUNCH\_LINK system for nvcc\_wrapper [\#3136](https://github.com/kokkos/kokkos/pull/3136)
- Adding -include as a known argument to nvcc\_wrapper [\#3434](https://github.com/kokkos/kokkos/pull/3434)
- Install hpcbind script [\#3402](https://github.com/kokkos/kokkos/pull/3402)
- cmake/kokkos\_tribits.cmake: add parsing for args [\#3457](https://github.com/kokkos/kokkos/pull/3457)
**Implemented enhancements Tools:**
- Changed namespacing of Kokkos::Tools::Impl::Impl::tune\_policy [\#3455](https://github.com/kokkos/kokkos/pull/#3455)
- Delegate to an impl allocate/deallocate method to allow specifying a SpaceHandle for MemorySpaces [\#3530](https://github.com/kokkos/kokkos/pull/#3530)
- Use the Kokkos Profiling interface rather than the Impl interface [\#3518](https://github.com/kokkos/kokkos/pull/#3518)
- Runtime option for tuning [\#3459](https://github.com/kokkos/kokkos/pull/#3459)
- Dual View Tool Events [\#3326](https://github.com/kokkos/kokkos/pull/#3326)
- Changed namespacing of Kokkos::Tools::Impl::Impl::tune\_policy [\#3455](https://github.com/kokkos/kokkos/pull/3455)
- Delegate to an impl allocate/deallocate method to allow specifying a SpaceHandle for MemorySpaces [\#3530](https://github.com/kokkos/kokkos/pull/3530)
- Use the Kokkos Profiling interface rather than the Impl interface [\#3518](https://github.com/kokkos/kokkos/pull/3518)
- Runtime option for tuning [\#3459](https://github.com/kokkos/kokkos/pull/3459)
- Dual View Tool Events [\#3326](https://github.com/kokkos/kokkos/pull/3326)
**Implemented enhancements Other:**
- Abort on errors instead of just printing [\#3528](https://github.com/kokkos/kokkos/pull/#3528)
- Enable C++14 macros unconditionally [\#3449](https://github.com/kokkos/kokkos/pull/#3449)
- Make ViewMapping trivially copyable [\#3436](https://github.com/kokkos/kokkos/pull/#3436)
- Rename struct ViewMapping to class [\#3435](https://github.com/kokkos/kokkos/pull/#3435)
- Replace enums in Kokkos\_ViewMapping.hpp (removes -Wextra) [\#3422](https://github.com/kokkos/kokkos/pull/#3422)
- Use bool for enums representing bools [\#3416](https://github.com/kokkos/kokkos/pull/#3416)
- Fence active instead of default execution space instances [\#3388](https://github.com/kokkos/kokkos/pull/#3388)
- Refactor parallel\_reduce fence usage [\#3359](https://github.com/kokkos/kokkos/pull/#3359)
- Moved Space EBO helpers to Kokkos\_EBO [\#3357](https://github.com/kokkos/kokkos/pull/#3357)
- Add remove\_cvref type trait [\#3340](https://github.com/kokkos/kokkos/pull/#3340)
- Adding identity type traits and update definition of identity\_t alias [\#3339](https://github.com/kokkos/kokkos/pull/#3339)
- Add is\_specialization\_of type trait [\#3338](https://github.com/kokkos/kokkos/pull/#3338)
- Make ScratchMemorySpace semi-regular [\#3309](https://github.com/kokkos/kokkos/pull/#3309)
- Optimize min/max atomics with early exit on no-op case [\#3265](https://github.com/kokkos/kokkos/pull/#3265)
- Refactor Backend Development [\#2941](https://github.com/kokkos/kokkos/pull/#2941)
- Abort on errors instead of just printing [\#3528](https://github.com/kokkos/kokkos/pull/3528)
- Enable C++14 macros unconditionally [\#3449](https://github.com/kokkos/kokkos/pull/3449)
- Make ViewMapping trivially copyable [\#3436](https://github.com/kokkos/kokkos/pull/3436)
- Rename struct ViewMapping to class [\#3435](https://github.com/kokkos/kokkos/pull/3435)
- Replace enums in Kokkos\_ViewMapping.hpp (removes -Wextra) [\#3422](https://github.com/kokkos/kokkos/pull/3422)
- Use bool for enums representing bools [\#3416](https://github.com/kokkos/kokkos/pull/3416)
- Fence active instead of default execution space instances [\#3388](https://github.com/kokkos/kokkos/pull/3388)
- Refactor parallel\_reduce fence usage [\#3359](https://github.com/kokkos/kokkos/pull/3359)
- Moved Space EBO helpers to Kokkos\_EBO [\#3357](https://github.com/kokkos/kokkos/pull/3357)
- Add remove\_cvref type trait [\#3340](https://github.com/kokkos/kokkos/pull/3340)
- Adding identity type traits and update definition of identity\_t alias [\#3339](https://github.com/kokkos/kokkos/pull/3339)
- Add is\_specialization\_of type trait [\#3338](https://github.com/kokkos/kokkos/pull/3338)
- Make ScratchMemorySpace semi-regular [\#3309](https://github.com/kokkos/kokkos/pull/3309)
- Optimize min/max atomics with early exit on no-op case [\#3265](https://github.com/kokkos/kokkos/pull/3265)
- Refactor Backend Development [\#2941](https://github.com/kokkos/kokkos/pull/2941)
**Fixed bugs:**
- Fixup MDRangePolicy construction from Kokkos arrays [\#3591](https://github.com/kokkos/kokkos/pull/#3591)
- Add atomic functions for unsigned long long using gcc built-in [\#3588](https://github.com/kokkos/kokkos/pull/#3588)
- Fixup silent pointless comparison with zero in checked\_narrow\_cast (compiler workaround) [\#3566](https://github.com/kokkos/kokkos/pull/#3566)
- Fixes for ROCm 3.9 [\#3565](https://github.com/kokkos/kokkos/pull/#3565)
- Fix windows build issues which crept in for the CUDA build [\#3532](https://github.com/kokkos/kokkos/pull/#3532)
- HIP Fix atomics of large data types and clean up lock arrays [\#3529](https://github.com/kokkos/kokkos/pull/#3529)
- Pthreads fix exception resulting from 0 grain size [\#3510](https://github.com/kokkos/kokkos/pull/#3510)
- Fixup do not require atomic operation to be default constructible [\#3503](https://github.com/kokkos/kokkos/pull/#3503)
- Fix race condition in HIP backend [\#3467](https://github.com/kokkos/kokkos/pull/#3467)
- Replace KOKKOS\_DEBUG with KOKKOS\_ENABLE\_DEBUG [\#3458](https://github.com/kokkos/kokkos/pull/#3458)
- Fix multi-stream team scratch space definition for HIP [\#3398](https://github.com/kokkos/kokkos/pull/#3398)
- HIP fix template deduction [\#3393](https://github.com/kokkos/kokkos/pull/#3393)
- Fix compiling with HIP and C++17 [\#3390](https://github.com/kokkos/kokkos/pull/#3390)
- Fix sigFPE in HIP blocksize deduction [\#3378](https://github.com/kokkos/kokkos/pull/#3378)
- Type alias change: replace CS with CTS to avoid conflicts with NVSHMEM [\#3348](https://github.com/kokkos/kokkos/pull/#3348)
- Clang compilation of CUDA backend on Windows [\#3345](https://github.com/kokkos/kokkos/pull/#3345)
- Fix HBW support [\#3343](https://github.com/kokkos/kokkos/pull/#3343)
- Added missing fences to unique token [\#3260](https://github.com/kokkos/kokkos/pull/#3260)
- Fixup MDRangePolicy construction from Kokkos arrays [\#3591](https://github.com/kokkos/kokkos/pull/3591)
- Add atomic functions for unsigned long long using gcc built-in [\#3588](https://github.com/kokkos/kokkos/pull/3588)
- Fixup silent pointless comparison with zero in checked\_narrow\_cast (compiler workaround) [\#3566](https://github.com/kokkos/kokkos/pull/3566)
- Fixes for ROCm 3.9 [\#3565](https://github.com/kokkos/kokkos/pull/3565)
- Fix windows build issues which crept in for the CUDA build [\#3532](https://github.com/kokkos/kokkos/pull/3532)
- HIP Fix atomics of large data types and clean up lock arrays [\#3529](https://github.com/kokkos/kokkos/pull/3529)
- Pthreads fix exception resulting from 0 grain size [\#3510](https://github.com/kokkos/kokkos/pull/3510)
- Fixup do not require atomic operation to be default constructible [\#3503](https://github.com/kokkos/kokkos/pull/3503)
- Fix race condition in HIP backend [\#3467](https://github.com/kokkos/kokkos/pull/3467)
- Replace KOKKOS\_DEBUG with KOKKOS\_ENABLE\_DEBUG [\#3458](https://github.com/kokkos/kokkos/pull/3458)
- Fix multi-stream team scratch space definition for HIP [\#3398](https://github.com/kokkos/kokkos/pull/3398)
- HIP fix template deduction [\#3393](https://github.com/kokkos/kokkos/pull/3393)
- Fix compiling with HIP and C++17 [\#3390](https://github.com/kokkos/kokkos/pull/3390)
- Fix sigFPE in HIP blocksize deduction [\#3378](https://github.com/kokkos/kokkos/pull/3378)
- Type alias change: replace CS with CTS to avoid conflicts with NVSHMEM [\#3348](https://github.com/kokkos/kokkos/pull/3348)
- Clang compilation of CUDA backend on Windows [\#3345](https://github.com/kokkos/kokkos/pull/3345)
- Fix HBW support [\#3343](https://github.com/kokkos/kokkos/pull/3343)
- Added missing fences to unique token [\#3260](https://github.com/kokkos/kokkos/pull/3260)
**Incompatibilities:**
- Remove unused utilities (forward, move, and expand\_variadic) from Kokkos::Impl [\#3535](https://github.com/kokkos/kokkos/pull/#3535)
- Remove unused traits [\#3534](https://github.com/kokkos/kokkos/pull/#3534)
- HIP: Remove old HCC code [\#3301](https://github.com/kokkos/kokkos/pull/#3301)
- Prepare for deprecation of ViewAllocateWithoutInitializing [\#3264](https://github.com/kokkos/kokkos/pull/#3264)
- Remove ROCm backend [\#3148](https://github.com/kokkos/kokkos/pull/#3148)
- Remove unused utilities (forward, move, and expand\_variadic) from Kokkos::Impl [\#3535](https://github.com/kokkos/kokkos/pull/3535)
- Remove unused traits [\#3534](https://github.com/kokkos/kokkos/pull/3534)
- HIP: Remove old HCC code [\#3301](https://github.com/kokkos/kokkos/pull/3301)
- Prepare for deprecation of ViewAllocateWithoutInitializing [\#3264](https://github.com/kokkos/kokkos/pull/3264)
- Remove ROCm backend [\#3148](https://github.com/kokkos/kokkos/pull/3148)
## [3.2.01](https://github.com/kokkos/kokkos/tree/3.2.01) (2020-11-17)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.2.00...3.2.01)

View File

@ -150,8 +150,8 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 4)
set(Kokkos_VERSION_MINOR 2)
set(Kokkos_VERSION_PATCH 1)
set(Kokkos_VERSION_MINOR 3)
set(Kokkos_VERSION_PATCH 0)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
message(STATUS "Kokkos version: ${Kokkos_VERSION}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")

View File

@ -11,8 +11,8 @@ CXXFLAGS += $(SHFLAGS)
endif
KOKKOS_VERSION_MAJOR = 4
KOKKOS_VERSION_MINOR = 2
KOKKOS_VERSION_PATCH = 1
KOKKOS_VERSION_MINOR = 3
KOKKOS_VERSION_PATCH = 0
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
@ -22,14 +22,14 @@ KOKKOS_DEVICES ?= "OpenMP"
# Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
# IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: GFX906,GFX908,GFX90A,GFX940,GFX942,GFX1030,GFX1100
# IBM: Power8,Power9
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
KOKKOS_ARCH ?= ""
# Options: yes,no
KOKKOS_DEBUG ?= "no"
# Options: hwloc,librt,experimental_memkind
# Options: hwloc
KOKKOS_USE_TPLS ?= ""
# Options: c++17,c++1z,c++20,c++2a,c++23,c++2b
KOKKOS_CXX_STANDARD ?= "c++17"
@ -56,7 +56,7 @@ uppercase_internal=$(if $1,$$(subst $(firstword $1),$(call uppercase_internal,$(
uppercase=$(eval uppercase_RESULT:=$(call uppercase_internal,$(uppercase_TABLE),$1))$(uppercase_RESULT)
# Return a 1 if a string contains a substring and 0 if not
# Note the search string should be without '"'
# Example: $(call kokkos_has_string,"hwloc,librt",hwloc)
# Example: $(call kokkos_has_string,"hwloc,libdl",hwloc)
# Will return a 1
kokkos_has_string=$(if $(findstring $(call uppercase,$2),$(call uppercase,$1)),1,0)
# Returns 1 if the path exists, 0 otherwise
@ -73,11 +73,11 @@ KOKKOS_INTERNAL_ENABLE_CXX20 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),
KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a)
KOKKOS_INTERNAL_ENABLE_CXX23 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++23)
KOKKOS_INTERNAL_ENABLE_CXX2B := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2b)
KOKKOS_INTERNAL_ENABLE_CXX26 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++26)
KOKKOS_INTERNAL_ENABLE_CXX2C := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2c)
# Check for external libraries.
KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc)
KOKKOS_INTERNAL_USE_LIBRT := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),librt)
KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),experimental_memkind)
# Check for advanced settings.
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
@ -318,7 +318,6 @@ endif
# Intel based.
KOKKOS_INTERNAL_USE_ARCH_KNC := $(call kokkos_has_string,$(KOKKOS_ARCH),KNC)
KOKKOS_INTERNAL_USE_ARCH_WSM := $(call kokkos_has_string,$(KOKKOS_ARCH),WSM)
KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB)
KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW)
KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW)
@ -398,11 +397,9 @@ KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX)
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc))
# IBM based.
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(call kokkos_has_string,$(KOKKOS_ARCH),BGQ)
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power7)
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8)
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power9)
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
# AMD based.
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
@ -413,22 +410,37 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0)
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
endif
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100)
endif
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
# Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc)
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc)
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
@ -573,6 +585,16 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2B), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2B_FLAG)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX23")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX26), 1)
#I cannot make CMake add this in a good way - so add it here
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX26_FLAG)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2C), 1)
#I cannot make CMake add this in a good way - so add it here
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2C_FLAG)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
@ -612,27 +634,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HWLOC")
endif
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_LIBRT")
KOKKOS_LIBS += -lrt
KOKKOS_TPL_LIBRARY_NAMES += rt
endif
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
ifneq ($(KOKKOS_CMAKE), yes)
ifneq ($(MEMKIND_PATH),)
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
KOKKOS_LIBDIRS += -L$(MEMKIND_PATH)/lib
KOKKOS_CXXLDFLAGS += -L$(MEMKIND_PATH)/lib
KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include
KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib
endif
KOKKOS_LIBS += -lmemkind -lnuma
KOKKOS_TPL_LIBRARY_NAMES += memkind numa
endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HBWSPACE")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LARGE_MEM_TESTS")
endif
@ -699,10 +700,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC), 0)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC")
else
@ -827,20 +824,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_SSE42")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xSSE4.2
KOKKOS_LDFLAGS += -xSSE4.2
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
else
# Assume that this is a really a GNU compiler.
KOKKOS_CXXFLAGS += -msse4.2
KOKKOS_LDFLAGS += -msse4.2
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX")
@ -1249,7 +1232,6 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
tmp := $(call kokkos_update_config_header, KOKKOS_FWD_HPP_, "KokkosCore_Config_FwdBackend.tmp", "KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config_SetupBackend.tmp", "KokkosCore_Config_SetupBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_POST_INCLUDE_HPP_, "KokkosCore_Config_PostInclude.tmp", "KokkosCore_Config_PostInclude.hpp")
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_CUDA.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_CUDA.hpp>","KokkosCore_Config_DeclareBackend.hpp")
@ -1289,10 +1271,6 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_SERIAL.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_SERIAL.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HBWSpace.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HBWSpace.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
@ -1403,11 +1381,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
KOKKOS_TPL_LIBRARY_NAMES += hpx
endif
# Don't include Kokkos_HBWSpace.cpp if not using MEMKIND to avoid a link warning.
ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC))
endif
# With Cygwin functions such as fdopen and fileno are not defined
# when strict ansi is enabled. strict ansi gets enabled with -std=c++14
# though. So we hard undefine it here. Not sure if that has any bad side effects
@ -1461,6 +1434,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
else
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENMP */")
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)
tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_OPENACC")
else
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENACC */")
endif
tmp := $(call desul_append_header, "")
tmp := $(call desul_append_header, "$H""endif")
@ -1493,7 +1472,7 @@ include $(KOKKOS_PATH)/Makefile.targets
kokkos-clean:
rm -f $(KOKKOS_OBJ_LINK) $(DESUL_CONFIG_HEADER) $(DESUL_INTERNAL_CONFIG_TMP) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a KokkosCore_Config_SetupBackend.hpp \
KokkosCore_Config_FwdBackend.hpp KokkosCore_Config_DeclareBackend.hpp KokkosCore_Config_DeclareBackend.tmp \
KokkosCore_Config_FwdBackend.tmp KokkosCore_Config_PostInclude.hpp KokkosCore_Config_PostInclude.tmp KokkosCore_Config_SetupBackend.tmp
KokkosCore_Config_FwdBackend.tmp KokkosCore_Config_SetupBackend.tmp
libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
ar cr libkokkos.a $(KOKKOS_OBJ_LINK)

View File

@ -20,8 +20,6 @@ Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Ta
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
Kokkos_Profiling.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
@ -30,8 +28,6 @@ Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
Kokkos_MemorySpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp
@ -82,8 +78,10 @@ Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array
endif
ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
Kokkos_Threads_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
Kokkos_Threads_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Spinwait.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -123,6 +121,3 @@ Kokkos_OpenACC_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC
Kokkos_OpenACC_SharedAllocationRecord.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp
endif
Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp

View File

@ -28,7 +28,7 @@ To start learning about Kokkos:
- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
For questions find us on Slack: https://kokkosteam.slack.com or open a github issue.
For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue.
For non-public questions send an email to: *crtrott(at)sandia.gov*
@ -48,10 +48,10 @@ Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citati
# License
[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
[![License](https://img.shields.io/badge/License-Apache--2.0_WITH_LLVM--exception-blue)](https://spdx.org/licenses/LLVM-exception.html)
Under the terms of Contract DE-NA0003525 with NTESS,
the U.S. Government retains certain rights in this software.
The full license statement used in all headers is available [here](https://kokkos.github.io/kokkos-core-wiki/license.html) or
[here](https://github.com/kokkos/kokkos/blob/master/LICENSE).
The full license statement used in all headers is available [here](https://kokkos.org/kokkos-core-wiki/license.html) or
[here](https://github.com/kokkos/kokkos/blob/develop/LICENSE).

12
lib/kokkos/SECURITY.md Normal file
View File

@ -0,0 +1,12 @@
# Reporting Security Issues
To report a security issue, please email
[lebrungrandt@ornl.gov](mailto:lebrungrandt@ornl.gov)
and [crtrott@sandia.gov](mailto:crtrott@sandia.gov)
with a description of the issue, the steps you took to create the issue,
affected versions, and, if known, mitigations for the issue.
Our vulnerability management team will respond within 5 working days of your
email. If the issue is confirmed as a vulnerability, we will open a
Security Advisory and acknowledge your contributions as part of it. This project
follows a 90 day disclosure timeline.

View File

@ -159,7 +159,6 @@ If you don't specify a CUDA build variant in a `packages.yaml` and you build you
> spack install superscience
````
you may end up just getting the default Kokkos (i.e. Serial).
Some examples are included in the `config/yaml` folder for common platforms.
Before running `spack install <package>` we recommend running `spack spec <package>` to confirm your dependency tree is correct.
For example, with Kokkos Kernels:
````bash

View File

@ -30,5 +30,5 @@ KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms
${CMAKE_CURRENT_SOURCE_DIR}
)
KOKKOS_LINK_TPL(kokkoscontainers PUBLIC ROCTHRUST)
KOKKOS_LINK_TPL(kokkoscore PUBLIC ONEDPL)

View File

@ -849,18 +849,17 @@ class Random_XorShift64 {
return drand(end - start) + start;
}
// Marsaglia polar method for drawing a standard normal distributed random
// Box-muller method for drawing a standard normal distributed random
// number
KOKKOS_INLINE_FUNCTION
double normal() {
double S = 2.0;
double U;
while (S >= 1.0) {
U = 2.0 * drand() - 1.0;
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * std::log(S) / S);
constexpr auto two_pi = 2 * Kokkos::numbers::pi_v<double>;
const double u = drand();
const double v = drand();
const double r = Kokkos::sqrt(-2.0 * Kokkos::log(u));
const double theta = v * two_pi;
return r * Kokkos::cos(theta);
}
KOKKOS_INLINE_FUNCTION
@ -1094,18 +1093,17 @@ class Random_XorShift1024 {
return drand(end - start) + start;
}
// Marsaglia polar method for drawing a standard normal distributed random
// Box-muller method for drawing a standard normal distributed random
// number
KOKKOS_INLINE_FUNCTION
double normal() {
double S = 2.0;
double U;
while (S >= 1.0) {
U = 2.0 * drand() - 1.0;
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * std::log(S) / S);
constexpr auto two_pi = 2 * Kokkos::numbers::pi_v<double>;
const double u = drand();
const double v = drand();
const double r = Kokkos::sqrt(-2.0 * Kokkos::log(u));
const double theta = v * two_pi;
return r * Kokkos::cos(theta);
}
KOKKOS_INLINE_FUNCTION
@ -1545,13 +1543,23 @@ template <class ViewType, class RandomPool, class IndexType = int64_t>
void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type begin,
typename ViewType::const_value_type end) {
fill_random(typename ViewType::execution_space{}, a, g, begin, end);
Kokkos::fence(
"fill_random: fence before since no execution space instance provided");
typename ViewType::execution_space exec;
fill_random(exec, a, g, begin, end);
exec.fence(
"fill_random: fence after since no execution space instance provided");
}
template <class ViewType, class RandomPool, class IndexType = int64_t>
void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type range) {
fill_random(typename ViewType::execution_space{}, a, g, 0, range);
Kokkos::fence(
"fill_random: fence before since no execution space instance provided");
typename ViewType::execution_space exec;
fill_random(exec, a, g, 0, range);
exec.fence(
"fill_random: fence after since no execution space instance provided");
}
} // namespace Kokkos

View File

@ -23,6 +23,7 @@
#include "sorting/Kokkos_BinSortPublicAPI.hpp"
#include "sorting/Kokkos_SortPublicAPI.hpp"
#include "sorting/Kokkos_SortByKeyPublicAPI.hpp"
#include "sorting/Kokkos_NestedSortPublicAPI.hpp"
#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT

View File

@ -35,7 +35,6 @@
// following the std classification.
// modifying ops
#include "std_algorithms/Kokkos_Swap.hpp"
#include "std_algorithms/Kokkos_IterSwap.hpp"
// non-modifying sequence

View File

@ -0,0 +1,117 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#ifndef KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_
#define KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_
#include "./impl/Kokkos_SortByKeyImpl.hpp"
#include <Kokkos_Core.hpp>
#include <algorithm>
namespace Kokkos::Experimental {
// ---------------------------------------------------------------
// basic overloads
// ---------------------------------------------------------------
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties>
void sort_by_key(
const ExecutionSpace& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
// constraints
using KeysType = Kokkos::View<KeysDataType, KeysProperties...>;
using ValuesType = Kokkos::View<ValuesDataType, ValuesProperties...>;
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(keys);
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(values);
static_assert(SpaceAccessibility<ExecutionSpace,
typename KeysType::memory_space>::accessible,
"Kokkos::sort: execution space instance is not able to access "
"the memory space of the keys View argument!");
static_assert(
SpaceAccessibility<ExecutionSpace,
typename ValuesType::memory_space>::accessible,
"Kokkos::sort: execution space instance is not able to access "
"the memory space of the values View argument!");
static_assert(KeysType::static_extent(0) == 0 ||
ValuesType::static_extent(0) == 0 ||
KeysType::static_extent(0) == ValuesType::static_extent(0));
if (values.size() != keys.size())
Kokkos::abort((std::string("values and keys extents must be the same. The "
"values extent is ") +
std::to_string(values.size()) + ", and the keys extent is " +
std::to_string(keys.size()) + ".")
.c_str());
if (keys.extent(0) <= 1) {
return;
}
::Kokkos::Impl::sort_by_key_device_view_without_comparator(exec, keys,
values);
}
// ---------------------------------------------------------------
// overloads supporting a custom comparator
// ---------------------------------------------------------------
template <class ExecutionSpace, class ComparatorType, class KeysDataType,
class... KeysProperties, class ValuesDataType,
class... ValuesProperties>
void sort_by_key(
const ExecutionSpace& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
const ComparatorType& comparator) {
// constraints
using KeysType = Kokkos::View<KeysDataType, KeysProperties...>;
using ValuesType = Kokkos::View<ValuesDataType, ValuesProperties...>;
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(keys);
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(values);
static_assert(SpaceAccessibility<ExecutionSpace,
typename KeysType::memory_space>::accessible,
"Kokkos::sort: execution space instance is not able to access "
"the memory space of the keys View argument!");
static_assert(
SpaceAccessibility<ExecutionSpace,
typename ValuesType::memory_space>::accessible,
"Kokkos::sort: execution space instance is not able to access "
"the memory space of the values View argument!");
static_assert(KeysType::static_extent(0) == 0 ||
ValuesType::static_extent(0) == 0 ||
KeysType::static_extent(0) == ValuesType::static_extent(0));
if (values.size() != keys.size())
Kokkos::abort((std::string("values and keys extents must be the same. The "
"values extent is ") +
std::to_string(values.size()) + ", and the keys extent is " +
std::to_string(keys.size()) + ".")
.c_str());
if (keys.extent(0) <= 1) {
return;
}
::Kokkos::Impl::sort_by_key_device_view_with_comparator(exec, keys, values,
comparator);
}
} // namespace Kokkos::Experimental
#endif

View File

@ -29,7 +29,7 @@ namespace Kokkos {
// ---------------------------------------------------------------
template <class ExecutionSpace, class DataType, class... Properties>
void sort([[maybe_unused]] const ExecutionSpace& exec,
void sort(const ExecutionSpace& exec,
const Kokkos::View<DataType, Properties...>& view) {
// constraints
using ViewType = Kokkos::View<DataType, Properties...>;
@ -52,6 +52,7 @@ void sort([[maybe_unused]] const ExecutionSpace& exec,
}
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
exec.fence("Kokkos::sort without comparator use std::sort");
auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view);
std::sort(first, last);
@ -82,7 +83,7 @@ void sort(const Kokkos::View<DataType, Properties...>& view) {
// ---------------------------------------------------------------
template <class ExecutionSpace, class ComparatorType, class DataType,
class... Properties>
void sort([[maybe_unused]] const ExecutionSpace& exec,
void sort(const ExecutionSpace& exec,
const Kokkos::View<DataType, Properties...>& view,
const ComparatorType& comparator) {
// constraints
@ -105,6 +106,7 @@ void sort([[maybe_unused]] const ExecutionSpace& exec,
}
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
exec.fence("Kokkos::sort with comparator use std::sort");
auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view);
std::sort(first, last, comparator);

View File

@ -18,7 +18,6 @@
#define KOKKOS_NESTED_SORT_IMPL_HPP_
#include <Kokkos_Core.hpp>
#include <std_algorithms/Kokkos_Swap.hpp>
namespace Kokkos {
namespace Experimental {
@ -99,7 +98,7 @@ KOKKOS_INLINE_FUNCTION void sort_nested_impl(
keyView(elem1) = key2;
keyView(elem2) = key1;
if constexpr (!std::is_same_v<ValueViewType, std::nullptr_t>) {
Kokkos::Experimental::swap(valueView(elem1), valueView(elem2));
Kokkos::kokkos_swap(valueView(elem1), valueView(elem2));
}
}
}

View File

@ -0,0 +1,401 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#ifndef KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_
#define KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_
#include <Kokkos_Core.hpp>
#if defined(KOKKOS_ENABLE_CUDA)
// Workaround for `Instruction 'shfl' without '.sync' is not supported on
// .target sm_70 and higher from PTX ISA version 6.4`.
// Also see https://github.com/NVIDIA/cub/pull/170.
#if !defined(CUB_USE_COOPERATIVE_GROUPS)
#define CUB_USE_COOPERATIVE_GROUPS
#endif
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#if defined(KOKKOS_COMPILER_CLANG)
// Some versions of Clang fail to compile Thrust, failing with errors like
// this:
// <snip>/thrust/system/cuda/detail/core/agent_launcher.h:557:11:
// error: use of undeclared identifier 'va_printf'
// The exact combination of versions for Clang and Thrust (or CUDA) for this
// failure was not investigated, however even very recent version combination
// (Clang 10.0.0 and Cuda 10.0) demonstrated failure.
//
// Defining _CubLog here locally allows us to avoid that code path, however
// disabling some debugging diagnostics
#pragma push_macro("_CubLog")
#ifdef _CubLog
#undef _CubLog
#endif
#define _CubLog
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#pragma pop_macro("_CubLog")
#else
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#endif
#pragma GCC diagnostic pop
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#endif
#if defined(KOKKOS_ENABLE_ONEDPL) && \
(ONEDPL_VERSION_MAJOR > 2022 || \
(ONEDPL_VERSION_MAJOR == 2022 && ONEDPL_VERSION_MINOR >= 2))
#define KOKKOS_ONEDPL_HAS_SORT_BY_KEY
#include <oneapi/dpl/execution>
#include <oneapi/dpl/algorithm>
#endif
namespace Kokkos::Impl {
template <typename T>
constexpr inline bool is_admissible_to_kokkos_sort_by_key =
::Kokkos::is_view<T>::value&& T::rank() == 1 &&
(std::is_same<typename T::traits::array_layout,
Kokkos::LayoutLeft>::value ||
std::is_same<typename T::traits::array_layout,
Kokkos::LayoutRight>::value ||
std::is_same<typename T::traits::array_layout,
Kokkos::LayoutStride>::value);
template <class ViewType>
KOKKOS_INLINE_FUNCTION constexpr void
static_assert_is_admissible_to_kokkos_sort_by_key(const ViewType& /* view */) {
static_assert(is_admissible_to_kokkos_sort_by_key<ViewType>,
"Kokkos::sort_by_key only accepts 1D values View with "
"LayoutRight, LayoutLeft or LayoutStride.");
}
// For the fallback implementation for sort_by_key using Kokkos::sort, we need
// to consider if Kokkos::sort defers to the fallback implementation that copies
// the array to the host and uses std::sort, see
// copy_to_host_run_stdsort_copy_back() in impl/Kokkos_SortImpl.hpp. If
// sort_on_device_v is true, we assume that std::sort doesn't copy data.
// Otherwise, we manually copy all data to the host and provide Kokkos::sort
// with a host execution space.
template <class ExecutionSpace, class Layout>
inline constexpr bool sort_on_device_v = false;
#if defined(KOKKOS_ENABLE_CUDA)
template <class Layout>
inline constexpr bool sort_on_device_v<Kokkos::Cuda, Layout> = true;
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties, class... MaybeComparator>
void sort_by_key_cudathrust(
const Kokkos::Cuda& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
MaybeComparator&&... maybeComparator) {
const auto policy = thrust::cuda::par.on(exec.cuda_stream());
auto keys_first = ::Kokkos::Experimental::begin(keys);
auto keys_last = ::Kokkos::Experimental::end(keys);
auto values_first = ::Kokkos::Experimental::begin(values);
thrust::sort_by_key(policy, keys_first, keys_last, values_first,
std::forward<MaybeComparator>(maybeComparator)...);
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class Layout>
inline constexpr bool sort_on_device_v<Kokkos::HIP, Layout> = true;
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties, class... MaybeComparator>
void sort_by_key_rocthrust(
const Kokkos::HIP& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
MaybeComparator&&... maybeComparator) {
const auto policy = thrust::hip::par.on(exec.hip_stream());
auto keys_first = ::Kokkos::Experimental::begin(keys);
auto keys_last = ::Kokkos::Experimental::end(keys);
auto values_first = ::Kokkos::Experimental::begin(values);
thrust::sort_by_key(policy, keys_first, keys_last, values_first,
std::forward<MaybeComparator>(maybeComparator)...);
}
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
template <class Layout>
inline constexpr bool sort_on_device_v<Kokkos::Experimental::SYCL, Layout> =
std::is_same_v<Layout, Kokkos::LayoutLeft> ||
std::is_same_v<Layout, Kokkos::LayoutRight>;
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties, class... MaybeComparator>
void sort_by_key_onedpl(
const Kokkos::Experimental::SYCL& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
MaybeComparator&&... maybeComparator) {
if (keys.stride(0) != 1 && values.stride(0) != 1) {
Kokkos::abort(
"SYCL sort_by_key only supports rank-1 Views with stride(0) = 1.");
}
// Can't use Experimental::begin/end here since the oneDPL then assumes that
// the data is on the host.
auto queue = exec.sycl_queue();
auto policy = oneapi::dpl::execution::make_device_policy(queue);
const int n = keys.extent(0);
oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(),
std::forward<MaybeComparator>(maybeComparator)...);
}
#endif
#endif
template <typename ExecutionSpace, typename PermutationView, typename ViewType>
void applyPermutation(const ExecutionSpace& space,
const PermutationView& permutation,
const ViewType& view) {
static_assert(std::is_integral<typename PermutationView::value_type>::value);
auto view_copy = Kokkos::create_mirror(
Kokkos::view_alloc(space, typename ExecutionSpace::memory_space{},
Kokkos::WithoutInitializing),
view);
Kokkos::deep_copy(space, view_copy, view);
Kokkos::parallel_for(
"Kokkos::sort_by_key_via_sort::permute_" + view.label(),
Kokkos::RangePolicy<ExecutionSpace>(space, 0, view.extent(0)),
KOKKOS_LAMBDA(int i) { view(i) = view_copy(permutation(i)); });
}
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties,
class... MaybeComparator>
void sort_by_key_via_sort(
const ExecutionSpace& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
MaybeComparator&&... maybeComparator) {
static_assert(sizeof...(MaybeComparator) <= 1);
auto const n = keys.size();
Kokkos::View<unsigned int*, ExecutionSpace> permute(
Kokkos::view_alloc(exec, Kokkos::WithoutInitializing,
"Kokkos::sort_by_key_via_sort::permute"),
n);
// iota
Kokkos::parallel_for(
"Kokkos::sort_by_key_via_sort::iota",
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
KOKKOS_LAMBDA(int i) { permute(i) = i; });
using Layout =
typename Kokkos::View<unsigned int*, ExecutionSpace>::array_layout;
if constexpr (!sort_on_device_v<ExecutionSpace, Layout>) {
auto host_keys = Kokkos::create_mirror_view(
Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing),
keys);
auto host_permute = Kokkos::create_mirror_view(
Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing),
permute);
Kokkos::deep_copy(exec, host_keys, keys);
Kokkos::deep_copy(exec, host_permute, permute);
exec.fence("Kokkos::Impl::sort_by_key_via_sort: before host sort");
Kokkos::DefaultHostExecutionSpace host_exec;
if constexpr (sizeof...(MaybeComparator) == 0) {
Kokkos::sort(
host_exec, host_permute,
KOKKOS_LAMBDA(int i, int j) { return host_keys(i) < host_keys(j); });
} else {
auto keys_comparator =
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
Kokkos::sort(
host_exec, host_permute, KOKKOS_LAMBDA(int i, int j) {
return keys_comparator(host_keys(i), host_keys(j));
});
}
host_exec.fence("Kokkos::Impl::sort_by_key_via_sort: after host sort");
Kokkos::deep_copy(exec, permute, host_permute);
} else {
#ifdef KOKKOS_ENABLE_SYCL
auto* raw_keys_in_comparator = keys.data();
auto stride = keys.stride(0);
if constexpr (sizeof...(MaybeComparator) == 0) {
Kokkos::sort(
exec, permute, KOKKOS_LAMBDA(int i, int j) {
return raw_keys_in_comparator[i * stride] <
raw_keys_in_comparator[j * stride];
});
} else {
auto keys_comparator =
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
Kokkos::sort(
exec, permute, KOKKOS_LAMBDA(int i, int j) {
return keys_comparator(raw_keys_in_comparator[i * stride],
raw_keys_in_comparator[j * stride]);
});
}
#else
if constexpr (sizeof...(MaybeComparator) == 0) {
Kokkos::sort(
exec, permute,
KOKKOS_LAMBDA(int i, int j) { return keys(i) < keys(j); });
} else {
auto keys_comparator =
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
Kokkos::sort(
exec, permute, KOKKOS_LAMBDA(int i, int j) {
return keys_comparator(keys(i), keys(j));
});
}
#endif
}
applyPermutation(exec, permute, keys);
applyPermutation(exec, permute, values);
}
// ------------------------------------------------------
//
// specialize cases for sorting by key without comparator
//
// ------------------------------------------------------
#if defined(KOKKOS_ENABLE_CUDA)
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties>
void sort_by_key_device_view_without_comparator(
const Kokkos::Cuda& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
sort_by_key_cudathrust(exec, keys, values);
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties>
void sort_by_key_device_view_without_comparator(
const Kokkos::HIP& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
sort_by_key_rocthrust(exec, keys, values);
}
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties>
void sort_by_key_device_view_without_comparator(
const Kokkos::Experimental::SYCL& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
if (keys.stride(0) == 1 && values.stride(0) == 1)
sort_by_key_onedpl(exec, keys, values);
else
#endif
sort_by_key_via_sort(exec, keys, values);
}
#endif
// fallback case
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties>
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value>
sort_by_key_device_view_without_comparator(
const ExecutionSpace& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
sort_by_key_via_sort(exec, keys, values);
}
// ---------------------------------------------------
//
// specialize cases for sorting by key with comparator
//
// ---------------------------------------------------
#if defined(KOKKOS_ENABLE_CUDA)
template <class ComparatorType, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties>
void sort_by_key_device_view_with_comparator(
const Kokkos::Cuda& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
const ComparatorType& comparator) {
sort_by_key_cudathrust(exec, keys, values, comparator);
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class ComparatorType, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties>
void sort_by_key_device_view_with_comparator(
const Kokkos::HIP& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
const ComparatorType& comparator) {
sort_by_key_rocthrust(exec, keys, values, comparator);
}
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
template <class ComparatorType, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties>
void sort_by_key_device_view_with_comparator(
const Kokkos::Experimental::SYCL& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
const ComparatorType& comparator) {
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
if (keys.stride(0) == 1 && values.stride(0) == 1)
sort_by_key_onedpl(exec, keys, values, comparator);
else
#endif
sort_by_key_via_sort(exec, keys, values, comparator);
}
#endif
// fallback case
template <class ComparatorType, class ExecutionSpace, class KeysDataType,
class... KeysProperties, class ValuesDataType,
class... ValuesProperties>
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value>
sort_by_key_device_view_with_comparator(
const ExecutionSpace& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
const ComparatorType& comparator) {
sort_by_key_via_sort(exec, keys, values, comparator);
}
#undef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
} // namespace Kokkos::Impl
#endif

View File

@ -63,6 +63,11 @@
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
#include <oneapi/dpl/execution>
#include <oneapi/dpl/algorithm>
@ -184,6 +189,26 @@ void sort_cudathrust(const Cuda& space,
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class DataType, class... Properties, class... MaybeComparator>
void sort_rocthrust(const HIP& space,
const Kokkos::View<DataType, Properties...>& view,
MaybeComparator&&... maybeComparator) {
using ViewType = Kokkos::View<DataType, Properties...>;
static_assert(ViewType::rank == 1,
"Kokkos::sort: currently only supports rank-1 Views.");
if (view.extent(0) <= 1) {
return;
}
const auto exec = thrust::hip::par.on(space.hip_stream());
auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view);
thrust::sort(exec, first, last,
std::forward<MaybeComparator>(maybeComparator)...);
}
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
template <class DataType, class... Properties, class... MaybeComparator>
void sort_onedpl(const Kokkos::Experimental::SYCL& space,
@ -274,6 +299,14 @@ void sort_device_view_without_comparator(
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class DataType, class... Properties>
void sort_device_view_without_comparator(
const HIP& exec, const Kokkos::View<DataType, Properties...>& view) {
sort_rocthrust(exec, view);
}
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
template <class DataType, class... Properties>
void sort_device_view_without_comparator(
@ -320,6 +353,15 @@ void sort_device_view_with_comparator(
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class ComparatorType, class DataType, class... Properties>
void sort_device_view_with_comparator(
const HIP& exec, const Kokkos::View<DataType, Properties...>& view,
const ComparatorType& comparator) {
sort_rocthrust(exec, view, comparator);
}
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
template <class ComparatorType, class DataType, class... Properties>
void sort_device_view_with_comparator(

View File

@ -50,7 +50,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -66,7 +66,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -93,7 +93,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto copy(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -50,7 +50,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy_backward(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -65,7 +65,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy_backward(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -92,7 +92,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto copy_backward(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -54,7 +54,8 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy_if(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
const ::Kokkos::View<DataType2, Properties2...>& dest,
Predicate pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -69,7 +70,8 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy_if(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
const ::Kokkos::View<DataType2, Properties2...>& dest,
Predicate pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -96,7 +98,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto copy_if(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
const ::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -51,7 +51,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy_n(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -66,7 +66,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy_n(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -93,7 +93,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto copy_n(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -80,7 +80,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
bool equal(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
const ::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -96,7 +96,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
bool equal(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
const ::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -111,7 +111,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
bool equal(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2,
const ::Kokkos::View<DataType2, Properties2...>& view2,
BinaryPredicateType predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -128,7 +128,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
bool equal(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2,
const ::Kokkos::View<DataType2, Properties2...>& view2,
BinaryPredicateType predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -227,7 +227,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION bool equal(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
const ::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -243,7 +243,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION bool equal(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2,
const ::Kokkos::View<DataType2, Properties2...>& view2,
BinaryPredicateType predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);

View File

@ -19,7 +19,6 @@
#include <Kokkos_Core.hpp>
#include "impl/Kokkos_Constraints.hpp"
#include "Kokkos_Swap.hpp"
namespace Kokkos {
namespace Experimental {
@ -33,7 +32,7 @@ struct StdIterSwapFunctor {
KOKKOS_FUNCTION
void operator()(int i) const {
(void)i;
::Kokkos::Experimental::swap(*m_a, *m_b);
::Kokkos::kokkos_swap(*m_a, *m_b);
}
KOKKOS_FUNCTION
@ -58,6 +57,16 @@ void iter_swap(IteratorType1 a, IteratorType2 b) {
Impl::iter_swap_impl(a, b);
}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
template <class T>
KOKKOS_DEPRECATED_WITH_COMMENT("Use Kokkos::kokkos_swap instead!")
KOKKOS_FUNCTION
void swap(T& a, T& b) noexcept(::Kokkos::kokkos_swap(std::declval<T&>(),
std::declval<T&>())) {
::Kokkos::kokkos_swap(a, b);
}
#endif
} // namespace Experimental
} // namespace Kokkos

View File

@ -54,7 +54,7 @@ template <
bool lexicographical_compare(
const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
const ::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -71,7 +71,7 @@ template <
bool lexicographical_compare(
const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
const ::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -112,7 +112,8 @@ template <
bool lexicographical_compare(
const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
const ::Kokkos::View<DataType2, Properties2...>& view2,
ComparatorType comp) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -129,7 +130,8 @@ template <
bool lexicographical_compare(
const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
const ::Kokkos::View<DataType2, Properties2...>& view2,
ComparatorType comp) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -161,7 +163,7 @@ template <class TeamHandleType, class DataType1, class... Properties1,
KOKKOS_FUNCTION bool lexicographical_compare(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
const ::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -187,7 +189,8 @@ template <class TeamHandleType, class DataType1, class... Properties1,
KOKKOS_FUNCTION bool lexicographical_compare(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
const ::Kokkos::View<DataType2, Properties2...>& view2,
ComparatorType comp) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);

View File

@ -50,7 +50,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto move(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -64,7 +64,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto move(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -92,7 +92,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto move(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -41,7 +41,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto move_backward(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -65,7 +65,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto move_backward(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -94,7 +94,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto move_backward(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -50,7 +50,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto reverse_copy(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -65,7 +65,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto reverse_copy(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -94,7 +94,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto reverse_copy(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -40,7 +40,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
auto swap_ranges(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -64,7 +64,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
auto swap_ranges(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -94,7 +94,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto swap_ranges(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -58,7 +58,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
auto transform(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest,
const ::Kokkos::View<DataType2, Properties2...>& dest,
UnaryOperation unary_op) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -73,7 +73,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
auto transform(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest,
const ::Kokkos::View<DataType2, Properties2...>& dest,
UnaryOperation unary_op) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -119,7 +119,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
auto transform(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source1,
const ::Kokkos::View<DataType2, Properties2...>& source2,
::Kokkos::View<DataType3, Properties3...>& dest,
const ::Kokkos::View<DataType3, Properties3...>& dest,
BinaryOperation binary_op) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);
@ -137,7 +137,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
auto transform(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source1,
const ::Kokkos::View<DataType2, Properties2...>& source2,
::Kokkos::View<DataType3, Properties3...>& dest,
const ::Kokkos::View<DataType3, Properties3...>& dest,
BinaryOperation binary_op) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);
@ -174,7 +174,8 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto transform(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest, UnaryOperation unary_op) {
const ::Kokkos::View<DataType2, Properties2...>& dest,
UnaryOperation unary_op) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -207,7 +208,7 @@ KOKKOS_FUNCTION auto transform(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source1,
const ::Kokkos::View<DataType2, Properties2...>& source2,
::Kokkos::View<DataType3, Properties3...>& dest,
const ::Kokkos::View<DataType3, Properties3...>& dest,
BinaryOperation binary_op) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);

View File

@ -47,8 +47,9 @@ struct ExclusiveScanDefaultFunctorForKnownNeutralElement {
KOKKOS_FUNCTION
void operator()(const IndexType i, ValueType& update,
const bool final_pass) const {
const auto tmp = m_first_from[i];
if (final_pass) m_first_dest[i] = update + m_init_value;
update += m_first_from[i];
update += tmp;
}
};
@ -73,6 +74,7 @@ struct ExclusiveScanDefaultFunctorWithValueWrapper {
KOKKOS_FUNCTION
void operator()(const IndexType i, value_type& update,
const bool final_pass) const {
const auto tmp = value_type{m_first_from[i], false};
if (final_pass) {
if (i == 0) {
m_first_dest[i] = m_init_value;
@ -81,7 +83,6 @@ struct ExclusiveScanDefaultFunctorWithValueWrapper {
}
}
const auto tmp = value_type{m_first_from[i], false};
this->join(update, tmp);
}
@ -132,6 +133,7 @@ struct TransformExclusiveScanFunctorWithValueWrapper {
KOKKOS_FUNCTION
void operator()(const IndexType i, value_type& update,
const bool final_pass) const {
const auto tmp = value_type{m_unary_op(m_first_from[i]), false};
if (final_pass) {
if (i == 0) {
// for both ExclusiveScan and TransformExclusiveScan,
@ -142,7 +144,6 @@ struct TransformExclusiveScanFunctorWithValueWrapper {
}
}
const auto tmp = value_type{m_unary_op(m_first_from[i]), false};
this->join(update, tmp);
}
@ -190,6 +191,7 @@ struct TransformExclusiveScanFunctorWithoutValueWrapper {
KOKKOS_FUNCTION
void operator()(const IndexType i, ValueType& update,
const bool final_pass) const {
const auto tmp = ValueType{m_unary_op(m_first_from[i])};
if (final_pass) {
if (i == 0) {
// for both ExclusiveScan and TransformExclusiveScan,
@ -200,7 +202,6 @@ struct TransformExclusiveScanFunctorWithoutValueWrapper {
}
}
const auto tmp = ValueType{m_unary_op(m_first_from[i])};
this->join(update, tmp);
}

View File

@ -46,15 +46,14 @@ struct StdRemoveIfStage1Functor {
void operator()(const IndexType i, IndexType& update,
const bool final_pass) const {
auto& myval = m_first_from[i];
if (final_pass) {
if (!m_must_remove(myval)) {
if (!m_must_remove(myval)) {
if (final_pass) {
// calling move here is ok because we are inside final pass
// we are calling move assign as specified by the std
m_first_dest[update] = std::move(myval);
}
}
if (!m_must_remove(myval)) {
update += 1;
}
}
@ -108,7 +107,9 @@ IteratorType remove_if_exespace_impl(const std::string& label,
// create helper tmp view
using value_type = typename IteratorType::value_type;
using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
tmp_view_type tmp_view("std_remove_if_tmp_view", keep_count);
tmp_view_type tmp_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, ex,
"std_remove_if_tmp_view"),
keep_count);
using tmp_readwrite_iterator_type = decltype(begin(tmp_view));
// in stage 1, *move* all elements to keep from original range to tmp

View File

@ -21,7 +21,6 @@
#include "Kokkos_Constraints.hpp"
#include "Kokkos_HelperPredicates.hpp"
#include <std_algorithms/Kokkos_Distance.hpp>
#include <std_algorithms/Kokkos_Swap.hpp>
#include <string>
namespace Kokkos {
@ -39,7 +38,7 @@ struct StdReverseFunctor {
KOKKOS_FUNCTION
void operator()(index_type i) const {
::Kokkos::Experimental::swap(m_first[i], m_last[-i - 1]);
::Kokkos::kokkos_swap(m_first[i], m_last[-i - 1]);
}
KOKKOS_FUNCTION

View File

@ -126,10 +126,11 @@ KOKKOS_FUNCTION IteratorType shift_left_team_impl(
// execution space impl because for this team impl we are
// within a parallel region, so for now we solve serially
const std::size_t numElementsToMove =
using difference_type = typename IteratorType::difference_type;
const difference_type numElementsToMove =
::Kokkos::Experimental::distance(first + n, last);
Kokkos::single(Kokkos::PerTeam(teamHandle), [=]() {
for (std::size_t i = 0; i < numElementsToMove; ++i) {
for (difference_type i = 0; i < numElementsToMove; ++i) {
first[i] = std::move(first[i + n]);
}
});

View File

@ -103,26 +103,6 @@ IteratorType shift_right_exespace_impl(
return first + n;
}
template <class Iterator>
struct StdShiftRightTeamSingleFunctor {
Iterator m_first;
Iterator m_last;
std::size_t m_shift;
KOKKOS_FUNCTION
void operator()() const {
// the impl function calling this functor guarantees that
// - m_shift is non-negative
// - m_first, m_last identify a valid range with m_last > m_first
// - m_shift is less than m_last - m_first
// so I can safely use std::size_t here
}
KOKKOS_FUNCTION
StdShiftRightTeamSingleFunctor(Iterator _first, Iterator _last, std::size_t n)
: m_first(std::move(_first)), m_last(std::move(_last)), m_shift(n) {}
};
template <class TeamHandleType, class IteratorType>
KOKKOS_FUNCTION IteratorType shift_right_team_impl(
const TeamHandleType& teamHandle, IteratorType first, IteratorType last,
@ -145,10 +125,11 @@ KOKKOS_FUNCTION IteratorType shift_right_team_impl(
// execution space impl because for this team impl we are
// within a parallel region, so for now we solve serially
const std::size_t numElementsToMove =
using difference_type = typename IteratorType::difference_type;
const difference_type numElementsToMove =
::Kokkos::Experimental::distance(first, last - n);
Kokkos::single(Kokkos::PerTeam(teamHandle), [=]() {
for (std::size_t i = 0; i < numElementsToMove; ++i) {
for (difference_type i = 0; i < numElementsToMove; ++i) {
last[-i - 1] = std::move(last[-n - i - 1]);
}
});

View File

@ -21,7 +21,6 @@
#include "Kokkos_Constraints.hpp"
#include "Kokkos_HelperPredicates.hpp"
#include <std_algorithms/Kokkos_Distance.hpp>
#include <std_algorithms/Kokkos_Swap.hpp>
#include <string>
namespace Kokkos {
@ -36,7 +35,7 @@ struct StdSwapRangesFunctor {
KOKKOS_FUNCTION
void operator()(index_type i) const {
::Kokkos::Experimental::swap(m_first1[i], m_first2[i]);
::Kokkos::kokkos_swap(m_first1[i], m_first2[i]);
}
KOKKOS_FUNCTION

View File

@ -105,7 +105,9 @@ IteratorType unique_exespace_impl(const std::string& label,
// using the same algorithm used for unique_copy but we now move things
using value_type = typename IteratorType::value_type;
using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
tmp_view_type tmp_view("std_unique_tmp_view", num_elements_to_explore);
tmp_view_type tmp_view(Kokkos::view_alloc(ex, Kokkos::WithoutInitializing,
"std_unique_tmp_view"),
num_elements_to_explore);
// scan extent is: num_elements_to_explore - 1
// for same reason as the one explained in unique_copy

View File

@ -25,6 +25,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
set(ALGO_SORT_SOURCES)
foreach(SOURCE_Input
TestSort
TestSortByKey
TestSortCustomComp
TestBinSortA
TestBinSortB
@ -57,35 +58,37 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
configure_file(${dir}/dummy.cpp ${file})
list(APPEND ALGO_RANDOM_SOURCES ${file})
endforeach()
endif()
endforeach()
# ------------------------------------------
# std set A
# ------------------------------------------
set(STDALGO_SOURCES_A)
foreach(Name
# ------------------------------------------
# std set A
# ------------------------------------------
set(STDALGO_SOURCES_A)
foreach(Name
StdReducers
StdAlgorithmsConstraints
RandomAccessIterator
)
list(APPEND STDALGO_SOURCES_A Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_SOURCES_A Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std set B
# ------------------------------------------
set(STDALGO_SOURCES_B)
foreach(Name
# ------------------------------------------
# std set B
# ------------------------------------------
set(STDALGO_SOURCES_B)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsMinMaxElementOps
)
list(APPEND STDALGO_SOURCES_B Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_SOURCES_B Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std set C
# ------------------------------------------
set(STDALGO_SOURCES_C)
foreach(Name
# ------------------------------------------
# std set C
# ------------------------------------------
set(STDALGO_SOURCES_C)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsLexicographicalCompare
StdAlgorithmsForEach
@ -100,15 +103,15 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsSearch_n
StdAlgorithmsMismatch
StdAlgorithmsMoveBackward
)
list(APPEND STDALGO_SOURCES_C Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_SOURCES_C Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std set D
# ------------------------------------------
set(STDALGO_SOURCES_D)
foreach(Name
# ------------------------------------------
# std set D
# ------------------------------------------
set(STDALGO_SOURCES_D)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsModOps
StdAlgorithmsModSeqOps
@ -128,15 +131,15 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsReverse
StdAlgorithmsShiftLeft
StdAlgorithmsShiftRight
)
list(APPEND STDALGO_SOURCES_D Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_SOURCES_D Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std set E
# ------------------------------------------
set(STDALGO_SOURCES_E)
foreach(Name
# ------------------------------------------
# std set E
# ------------------------------------------
set(STDALGO_SOURCES_E)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsIsSorted
StdAlgorithmsIsSortedUntil
@ -149,83 +152,83 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsTransformUnaryOp
StdAlgorithmsTransformExclusiveScan
StdAlgorithmsTransformInclusiveScan
)
list(APPEND STDALGO_SOURCES_E Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_SOURCES_E Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team Q
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_Q)
foreach(Name
# ------------------------------------------
# std team Q
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_Q)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamInclusiveScan
StdAlgorithmsTeamTransformInclusiveScan
)
list(APPEND STDALGO_TEAM_SOURCES_Q Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_Q Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team P
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_P)
foreach(Name
# ------------------------------------------
# std team P
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_P)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamExclusiveScan
StdAlgorithmsTeamTransformExclusiveScan
)
list(APPEND STDALGO_TEAM_SOURCES_P Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_P Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team M
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_M)
foreach(Name
# ------------------------------------------
# std team M
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_M)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamTransformUnaryOp
StdAlgorithmsTeamTransformBinaryOp
StdAlgorithmsTeamGenerate
StdAlgorithmsTeamGenerate_n
StdAlgorithmsTeamSwapRanges
)
list(APPEND STDALGO_TEAM_SOURCES_M Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_M Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team L
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_L)
foreach(Name
# ------------------------------------------
# std team L
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_L)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamIsSorted
StdAlgorithmsTeamIsSortedUntil
StdAlgorithmsTeamIsPartitioned
StdAlgorithmsTeamPartitionCopy
StdAlgorithmsTeamPartitionPoint
)
list(APPEND STDALGO_TEAM_SOURCES_L Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_L Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team I
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_I)
foreach(Name
# ------------------------------------------
# std team I
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_I)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamUnique
StdAlgorithmsTeamAdjacentDifference
StdAlgorithmsTeamReduce
StdAlgorithmsTeamTransformReduce
)
list(APPEND STDALGO_TEAM_SOURCES_I Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_I Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team H
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_H)
foreach(Name
# ------------------------------------------
# std team H
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_H)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamCopy
StdAlgorithmsTeamCopy_n
@ -236,43 +239,43 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsTeamRemoveIf
StdAlgorithmsTeamRemoveCopy
StdAlgorithmsTeamRemoveCopyIf
)
list(APPEND STDALGO_TEAM_SOURCES_H Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_H Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team G
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_G)
foreach(Name
# ------------------------------------------
# std team G
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_G)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamMove
StdAlgorithmsTeamMoveBackward
StdAlgorithmsTeamShiftLeft
StdAlgorithmsTeamShiftRight
)
list(APPEND STDALGO_TEAM_SOURCES_G Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_G Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team F
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_F)
foreach(Name
# ------------------------------------------
# std team F
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_F)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamReverse
StdAlgorithmsTeamReverseCopy
StdAlgorithmsTeamRotate
StdAlgorithmsTeamRotateCopy
)
list(APPEND STDALGO_TEAM_SOURCES_F Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_F Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team E
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_E)
foreach(Name
# ------------------------------------------
# std team E
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_E)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamFill
StdAlgorithmsTeamFill_n
@ -280,28 +283,28 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsTeamReplaceIf
StdAlgorithmsTeamReplaceCopy
StdAlgorithmsTeamReplaceCopyIf
)
list(APPEND STDALGO_TEAM_SOURCES_E Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_E Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team D
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_D)
foreach(Name
# ------------------------------------------
# std team D
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_D)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamMinElement
StdAlgorithmsTeamMaxElement
StdAlgorithmsTeamMinMaxElement
)
list(APPEND STDALGO_TEAM_SOURCES_D Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_D Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team C
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_C)
foreach(Name
# ------------------------------------------
# std team C
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_C)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamFind
StdAlgorithmsTeamFindIf
@ -310,29 +313,29 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsTeamAnyOf
StdAlgorithmsTeamNoneOf
StdAlgorithmsTeamSearchN
)
list(APPEND STDALGO_TEAM_SOURCES_C Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_C Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team B
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_B)
foreach(Name
# ------------------------------------------
# std team B
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_B)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamEqual
StdAlgorithmsTeamSearch
StdAlgorithmsTeamFindEnd
StdAlgorithmsTeamFindFirstOf
)
list(APPEND STDALGO_TEAM_SOURCES_B Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_B Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team A
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_A)
foreach(Name
# ------------------------------------------
# std team A
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_A)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamAdjacentFind
StdAlgorithmsTeamCount
@ -341,11 +344,8 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsTeamForEachN
StdAlgorithmsTeamLexicographicalCompare
StdAlgorithmsTeamMismatch
)
list(APPEND STDALGO_TEAM_SOURCES_A Test${Name}.cpp)
endforeach()
endif()
)
list(APPEND STDALGO_TEAM_SOURCES_A Test${Name}.cpp)
endforeach()
# FIXME_OPENMPTARGET - remove sort test as it leads to ICE with clang/16 and above at compile time.

View File

@ -27,13 +27,13 @@ TARGETS =
tmp := $(foreach device, $(KOKKOS_DEVICELIST), \
$(if $(filter Test$(device).cpp, $(shell ls Test$(device).cpp 2>/dev/null)),,\
$(shell echo "\#include <Test"${device}"_Category.hpp>" > Test$(device).cpp); \
$(shell echo "\#include <TestRandom.hpp>" >> Test$(device).cpp); \
$(shell echo "\#include <TestSort.hpp>" >> Test$(device).cpp); \
$(shell echo "\#include <TestBinSortA.hpp>" >> Test$(device).cpp); \
$(shell echo "\#include <TestBinSortB.hpp>" >> Test$(device).cpp); \
$(shell echo "\#include <TestNestedSort.hpp>" >> Test$(device).cpp); \
$(shell echo "\#include <TestSortCustomComp.hpp>" >> Test$(device).cpp); \
$(shell echo "$(H)include <Test"${device}"_Category.hpp>" > Test$(device).cpp); \
$(shell echo "$(H)include <TestRandom.hpp>" >> Test$(device).cpp); \
$(shell echo "$(H)include <TestSort.hpp>" >> Test$(device).cpp); \
$(shell echo "$(H)include <TestBinSortA.hpp>" >> Test$(device).cpp); \
$(shell echo "$(H)include <TestBinSortB.hpp>" >> Test$(device).cpp); \
$(shell echo "$(H)include <TestNestedSort.hpp>" >> Test$(device).cpp); \
$(shell echo "$(H)include <TestSortCustomComp.hpp>" >> Test$(device).cpp); \
) \
)

View File

@ -0,0 +1,241 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_BY_KEY_HPP
#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_BY_KEY_HPP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <Kokkos_Sort.hpp>
#include <utility> // pair
namespace Test {
namespace SortImpl {
struct Less {
template <class ValueType>
KOKKOS_INLINE_FUNCTION bool operator()(const ValueType &lhs,
const ValueType &rhs) const {
return lhs < rhs;
}
};
struct Greater {
template <class ValueType>
KOKKOS_INLINE_FUNCTION bool operator()(const ValueType &lhs,
const ValueType &rhs) const {
return lhs > rhs;
}
};
template <class ExecutionSpace, class Keys, class Permute,
class Comparator = Less>
struct is_sorted_by_key_struct {
Keys keys;
Keys keys_orig;
Permute permute;
Comparator comparator;
is_sorted_by_key_struct(Keys keys_, Keys keys_orig_, Permute permute_,
Comparator comparator_ = Comparator{})
: keys(keys_),
keys_orig(keys_orig_),
permute(permute_),
comparator(comparator_) {}
KOKKOS_INLINE_FUNCTION
void operator()(int i, unsigned int &count) const {
if (i < keys.extent_int(0) - 1 && comparator(keys(i + 1), keys(i))) ++count;
if (keys(i) != keys_orig(permute(i))) ++count;
}
};
template <typename ExecutionSpace, typename ViewType>
void iota(ExecutionSpace const &space, ViewType const &v,
typename ViewType::value_type value = 0) {
using ValueType = typename ViewType::value_type;
Kokkos::parallel_for(
"ArborX::Algorithms::iota",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, v.extent(0)),
KOKKOS_LAMBDA(int i) { v(i) = value + (ValueType)i; });
}
} // namespace SortImpl
TEST(TEST_CATEGORY, SortByKeyEmptyView) {
using ExecutionSpace = TEST_EXECSPACE;
// does not matter if we use int or something else
Kokkos::View<int *, ExecutionSpace> keys("keys", 0);
Kokkos::View<float *, ExecutionSpace> values("values", 0);
ASSERT_NO_THROW(
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values));
}
TEST(TEST_CATEGORY, SortByKey) {
using ExecutionSpace = TEST_EXECSPACE;
using MemorySpace = typename ExecutionSpace::memory_space;
ExecutionSpace space{};
for (auto keys_vector : {std::vector<int>{36, 19, 25, 17, 3, 7, 1, 2, 9},
std::vector<int>{36, 19, 25, 17, 3, 9, 1, 2, 7},
std::vector<int>{100, 19, 36, 17, 3, 25, 1, 2, 7},
std::vector<int>{15, 5, 11, 3, 4, 8}}) {
auto const n = keys_vector.size();
auto keys = Kokkos::create_mirror_view_and_copy(
MemorySpace{},
Kokkos::View<int *, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(
keys_vector.data(), n));
auto keys_orig = Kokkos::create_mirror(space, keys);
Kokkos::deep_copy(space, keys_orig, keys);
Kokkos::View<int *, ExecutionSpace> permute("permute", n);
SortImpl::iota(space, permute);
Kokkos::Experimental::sort_by_key(space, keys, permute);
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
SortImpl::is_sorted_by_key_struct<ExecutionSpace, decltype(keys),
decltype(permute)>(keys, keys_orig,
permute),
sort_fails);
ASSERT_EQ(sort_fails, 0u);
}
}
TEST(TEST_CATEGORY, SortByKeyWithComparator) {
using ExecutionSpace = TEST_EXECSPACE;
using MemorySpace = typename ExecutionSpace::memory_space;
ExecutionSpace space{};
SortImpl::Greater comparator;
for (auto keys_vector : {std::vector<int>{36, 19, 25, 17, 3, 7, 1, 2, 9},
std::vector<int>{36, 19, 25, 17, 3, 9, 1, 2, 7},
std::vector<int>{100, 19, 36, 17, 3, 25, 1, 2, 7},
std::vector<int>{15, 5, 11, 3, 4, 8}}) {
auto const n = keys_vector.size();
auto keys = Kokkos::create_mirror_view_and_copy(
MemorySpace{},
Kokkos::View<int *, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(
keys_vector.data(), n));
auto keys_orig = Kokkos::create_mirror(space, keys);
Kokkos::deep_copy(space, keys_orig, keys);
Kokkos::View<int *, ExecutionSpace> permute("permute", n);
SortImpl::iota(space, permute);
Kokkos::Experimental::sort_by_key(space, keys, permute, comparator);
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
SortImpl::is_sorted_by_key_struct<ExecutionSpace, decltype(keys),
decltype(permute), SortImpl::Greater>(
keys, keys_orig, permute, comparator),
sort_fails);
ASSERT_EQ(sort_fails, 0u);
}
}
TEST(TEST_CATEGORY, SortByKeyStaticExtents) {
using ExecutionSpace = TEST_EXECSPACE;
ExecutionSpace space{};
Kokkos::View<int[10], ExecutionSpace> keys("keys");
Kokkos::View<int[10], ExecutionSpace> values_static("values_static");
ASSERT_NO_THROW(
Kokkos::Experimental::sort_by_key(space, keys, values_static));
Kokkos::View<int *, ExecutionSpace> values_dynamic("values_dynamic", 10);
ASSERT_NO_THROW(
Kokkos::Experimental::sort_by_key(space, keys, values_dynamic));
}
template <typename ExecutionSpace, typename Keys, typename Values>
void buildViewsForStrided(ExecutionSpace const &space, int n, Keys &keys,
Values &values) {
Kokkos::parallel_for(
"create_data",
Kokkos::MDRangePolicy<Kokkos::Rank<3>, ExecutionSpace>(space, {0, 0, 0},
{n, n, n}),
KOKKOS_LAMBDA(int i, int j, int k) {
keys(i, j, k) = n - i;
values(i, j, k) = j;
});
}
TEST(TEST_CATEGORY, SortByKeyWithStrides) {
using ExecutionSpace = TEST_EXECSPACE;
ExecutionSpace space{};
auto const n = 10;
Kokkos::View<int ***, ExecutionSpace> keys("keys", n, n, n);
Kokkos::View<int ***, ExecutionSpace> values("values", n, n, n);
buildViewsForStrided(space, n, keys, values);
auto keys_sub = Kokkos::subview(keys, Kokkos::ALL(), 1, 2);
auto values_sub = Kokkos::subview(values, 4, Kokkos::ALL(), 6);
auto keys_orig = Kokkos::create_mirror(space, keys_sub);
Kokkos::deep_copy(space, keys_orig, keys_sub);
Kokkos::Experimental::sort_by_key(space, keys_sub, values_sub);
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
SortImpl::is_sorted_by_key_struct<ExecutionSpace, decltype(keys_sub),
decltype(values_sub)>(
keys_sub, keys_orig, values_sub),
sort_fails);
ASSERT_EQ(sort_fails, 0u);
}
TEST(TEST_CATEGORY, SortByKeyKeysLargerThanValues) {
using ExecutionSpace = TEST_EXECSPACE;
// does not matter if we use int or something else
Kokkos::View<int *, ExecutionSpace> keys("keys", 3);
Kokkos::View<float *, ExecutionSpace> values("values", 1);
ASSERT_DEATH(
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values),
"values and keys extents must be the same");
ASSERT_DEATH(Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values,
SortImpl::Greater{}),
"values and keys extents must be the same");
}
} // namespace Test
#endif

View File

@ -239,16 +239,8 @@ KOKKOS_FUNCTION bool team_members_have_matching_result(
// set accum to 1 if a mismach is found
const bool mismatch = memberValue != target;
int accum = static_cast<int>(mismatch);
// FIXME_OPENMPTARGET: team API does not meet the TeamHandle concept and
// ignores the reducer passed
#if defined KOKKOS_ENABLE_OPENMPTARGET
Kokkos::Sum<int> dummyReducer(accum);
const auto result = teamHandle.team_reduce(accum, dummyReducer);
return (result == 0);
#else
teamHandle.team_reduce(Kokkos::Sum<int>(accum));
return (accum == 0);
#endif
}
template <class ValueType1, class ValueType2>

View File

@ -16,6 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp>
#include <utility>
#include <iomanip>
namespace Test {
namespace stdalgos {
@ -132,47 +133,6 @@ void my_host_exclusive_scan(it1 first, it1 last, it2 dest, ValType init,
}
}
template <class ViewType1, class ViewType2, class ValueType, class BinaryOp>
void verify_data(ViewType1 data_view, // contains data
ViewType2 test_view, // the view to test
ValueType init_value, BinaryOp bop) {
//! always careful because views might not be deep copyable
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
auto data_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
using gold_view_value_type = typename ViewType2::value_type;
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
"goldh", data_view.extent(0));
my_host_exclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
KE::begin(gold_h), init_value, bop);
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
auto test_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
if (test_view_h.extent(0) > 0) {
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
// << gold_h(i) << " " << test_view_h(i) << " "
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
if (std::is_same<gold_view_value_type, int>::value) {
ASSERT_EQ(gold_h(i), test_view_h(i));
} else {
const auto error =
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
if (error > 1e-10) {
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
<< " " << gold_h(i) << " " << test_view_h(i) << " "
<< std::abs(static_cast<double>(gold_h(i) - test_view_h(i)))
<< std::endl;
}
EXPECT_LT(error, 1e-10);
}
}
}
}
template <class ValueType>
struct MultiplyFunctor {
KOKKOS_INLINE_FUNCTION
@ -189,107 +149,153 @@ struct SumFunctor {
}
};
struct VerifyData {
template <class ViewType1, class ViewType2, class ValueType, class BinaryOp>
void operator()(ViewType1 data_view, // contains data
ViewType2 test_view, // the view to test
ValueType init_value, BinaryOp bop) {
//! always careful because views might not be deep copyable
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
auto data_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
using gold_view_value_type = typename ViewType2::value_type;
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
"goldh", data_view.extent(0));
my_host_exclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
KE::begin(gold_h), init_value, bop);
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
auto test_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
if (test_view_h.extent(0) > 0) {
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
if (std::is_same<gold_view_value_type, int>::value) {
ASSERT_EQ(gold_h(i), test_view_h(i));
} else {
const auto error =
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
<< static_cast<double>(test_view_h(i)) << " "
<< static_cast<double>(gold_h(i));
}
}
}
}
template <class ViewType1, class ViewType2, class ValueType>
void operator()(ViewType1 data_view, // contains data
ViewType2 test_view, // the view to test
ValueType init_value) {
(*this)(data_view, test_view, init_value, SumFunctor<ValueType>());
}
};
std::string value_type_to_string(int) { return "int"; }
std::string value_type_to_string(double) { return "double"; }
template <class Tag, class ValueType, class InfoType>
void run_single_scenario_default_op(const InfoType& scenario_info,
ValueType init_value) {
using default_op = SumFunctor<ValueType>;
template <class Tag, class ValueType, class InfoType, class... OpOrEmpty>
void run_single_scenario(const InfoType& scenario_info, ValueType init_value,
OpOrEmpty... empty_or_op) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// std::cout << "exclusive_scan default op: " << name << ", "
// << view_tag_to_string(Tag{}) << ", "
// << value_type_to_string(ValueType()) << ", "
// << "init = " << init_value << std::endl;
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
auto view_from = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
fill_view(view_from, name);
// view_dest is filled with zeros before calling the algorithm everytime to
// ensure the algorithm does something meaningful
{
fill_zero(view_dest);
auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest),
init_value);
init_value, empty_or_op...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, default_op());
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
}
{
fill_zero(view_dest);
auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest),
init_value);
init_value, empty_or_op...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, default_op());
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
}
{
fill_zero(view_dest);
auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value);
auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value,
empty_or_op...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, default_op());
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
}
{
fill_zero(view_dest);
auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest,
init_value);
init_value, empty_or_op...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, default_op());
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
}
Kokkos::fence();
}
template <class Tag, class ValueType, class InfoType, class BinaryOp>
void run_single_scenario_custom_op(const InfoType& scenario_info,
ValueType init_value, BinaryOp bop) {
template <class Tag, class ValueType, class InfoType, class... OpOrEmpty>
void run_single_scenario_inplace(const InfoType& scenario_info,
ValueType init_value,
OpOrEmpty... empty_or_op) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// std::cout << "exclusive_scan custom op: " << name << ", "
// << view_tag_to_string(Tag{}) << ", "
// << value_type_to_string(ValueType()) << ", "
// << "init = " << init_value << std::endl;
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
auto view_from = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
fill_view(view_from, name);
// since here we call the in-place operation, we need to use two views:
// view1: filled according to what the scenario asks for and is not modified
// view2: filled according to what the scenario asks for and used for the
// in-place op Therefore, after the op is done, view2 should contain the
// result of doing exclusive scan NOTE: view2 is filled below every time
// because the algorithm acts in place
auto view1 =
create_view<ValueType>(Tag{}, view_ext, "exclusive_scan_inplace_view1");
fill_view(view1, name);
auto view2 =
create_view<ValueType>(Tag{}, view_ext, "exclusive_scan_inplace_view2");
{
fill_zero(view_dest);
auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest),
init_value, bop);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, bop);
fill_view(view2, name);
auto r = KE::exclusive_scan(exespace(), KE::cbegin(view2), KE::cend(view2),
KE::begin(view2), init_value, empty_or_op...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, init_value, empty_or_op...);
}
{
fill_zero(view_dest);
auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest),
init_value, bop);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, bop);
fill_view(view2, name);
auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view2),
KE::cend(view2), KE::begin(view2), init_value,
empty_or_op...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, init_value, empty_or_op...);
}
{
fill_zero(view_dest);
auto r =
KE::exclusive_scan(exespace(), view_from, view_dest, init_value, bop);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, bop);
fill_view(view2, name);
auto r = KE::exclusive_scan(exespace(), view2, view2, init_value,
empty_or_op...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, init_value, empty_or_op...);
}
{
fill_zero(view_dest);
auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest,
init_value, bop);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, bop);
fill_view(view2, name);
auto r = KE::exclusive_scan("label", exespace(), view2, view2, init_value,
empty_or_op...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, init_value, empty_or_op...);
}
Kokkos::fence();
@ -303,34 +309,39 @@ void run_exclusive_scan_all_scenarios() {
{"medium", 1103}, {"large", 10513}};
for (const auto& it : scenarios) {
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{0});
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{1});
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{-2});
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{3});
run_single_scenario<Tag, ValueType>(it, ValueType{0});
run_single_scenario<Tag, ValueType>(it, ValueType{1});
run_single_scenario<Tag, ValueType>(it, ValueType{-2});
run_single_scenario<Tag, ValueType>(it, ValueType{3});
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0});
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2});
#if !defined KOKKOS_ENABLE_OPENMPTARGET
// custom multiply op is only run for small views otherwise it overflows
if (it.first == "small-a" || it.first == "small-b") {
using custom_bop_t = MultiplyFunctor<ValueType>;
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{0},
custom_bop_t());
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{1},
custom_bop_t());
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{-2},
custom_bop_t());
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{3},
custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{0}, custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{1}, custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{-2}, custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{3}, custom_bop_t());
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0},
custom_bop_t());
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2},
custom_bop_t());
}
using custom_bop_t = SumFunctor<ValueType>;
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{0},
custom_bop_t());
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{1},
custom_bop_t());
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{-2},
custom_bop_t());
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{3},
custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{0}, custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{1}, custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{-2}, custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{3}, custom_bop_t());
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0},
custom_bop_t());
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2},
custom_bop_t());
#endif
}
}

View File

@ -16,6 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp>
#include <utility>
#include <iomanip>
namespace Test {
namespace stdalgos {
@ -143,51 +144,6 @@ void my_host_inclusive_scan(it1 first, it1 last, it2 dest, BinOp bop,
}
}
template <class ViewType1, class ViewType2, class BinaryOp, class... Args>
void verify_data(ViewType1 data_view, // contains data
ViewType2 test_view, // the view to test
BinaryOp bop, Args... args /* copy on purpose */) {
//! always careful because views might not be deep copyable
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
auto data_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
using gold_view_value_type = typename ViewType2::value_type;
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
"goldh", data_view.extent(0));
my_host_inclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
KE::begin(gold_h), bop, args...);
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
auto test_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
const auto ext = test_view_h.extent(0);
if (ext > 0) {
for (std::size_t i = 0; i < ext; ++i) {
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
// << gold_h(i) << " " << test_view_h(i) << " "
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
if (std::is_same<gold_view_value_type, int>::value) {
ASSERT_EQ(gold_h(i), test_view_h(i));
} else {
const auto error =
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
if (error > 1e-10) {
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
<< " " << gold_h(i) << " " << test_view_h(i) << " "
<< std::abs(static_cast<double>(gold_h(i) - test_view_h(i)))
<< std::endl;
}
EXPECT_LT(error, 1e-10);
}
}
// std::cout << " last el: " << test_view_h(ext-1) << std::endl;
}
}
template <class ValueType>
struct MultiplyFunctor {
KOKKOS_INLINE_FUNCTION
@ -204,107 +160,151 @@ struct SumFunctor {
}
};
struct VerifyData {
template <class ViewType1, class ViewType2, class BinaryOp, class... Args>
void operator()(ViewType1 data_view, // contains data
ViewType2 test_view, // the view to test
BinaryOp bop, Args... args /* copy on purpose */) {
//! always careful because views might not be deep copyable
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
auto data_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
using gold_view_value_type = typename ViewType2::value_type;
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
"goldh", data_view.extent(0));
my_host_inclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
KE::begin(gold_h), bop, args...);
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
auto test_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
const auto ext = test_view_h.extent(0);
if (ext > 0) {
for (std::size_t i = 0; i < ext; ++i) {
if (std::is_same<gold_view_value_type, int>::value) {
ASSERT_EQ(gold_h(i), test_view_h(i));
} else {
const auto error =
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
<< static_cast<double>(test_view_h(i)) << " "
<< static_cast<double>(gold_h(i));
}
}
}
}
template <class ViewType1, class ViewType2>
void operator()(ViewType1 data_view, // contains data
ViewType2 test_view) // the view to test
{
using value_type = typename ViewType1::non_const_value_type;
(*this)(data_view, test_view, SumFunctor<value_type>());
}
};
std::string value_type_to_string(int) { return "int"; }
std::string value_type_to_string(double) { return "double"; }
template <class Tag, class ValueType, class InfoType>
void run_single_scenario_default_op(const InfoType& scenario_info) {
using default_op = SumFunctor<ValueType>;
template <class Tag, class ValueType, class InfoType, class... Args>
void run_single_scenario(const InfoType& scenario_info,
Args... args /* copy on purpose */) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// std::cout << "inclusive_scan default op: " << name << ", "
// << view_tag_to_string(Tag{}) << ", "
// << value_type_to_string(ValueType()) << std::endl;
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
auto view_from = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
fill_view(view_from, name);
// view_dest is filled with zeros before calling the algorithm everytime to
// ensure the algorithm does something meaningful
{
fill_zero(view_dest);
auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest));
auto r =
KE::inclusive_scan(exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest), args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, default_op());
VerifyData()(view_from, view_dest, args...);
}
{
fill_zero(view_dest);
auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest));
auto r =
KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest), args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, default_op());
VerifyData()(view_from, view_dest, args...);
}
{
fill_zero(view_dest);
auto r = KE::inclusive_scan(exespace(), view_from, view_dest);
auto r = KE::inclusive_scan(exespace(), view_from, view_dest, args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, default_op());
VerifyData()(view_from, view_dest, args...);
}
{
fill_zero(view_dest);
auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest);
auto r =
KE::inclusive_scan("label", exespace(), view_from, view_dest, args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, default_op());
VerifyData()(view_from, view_dest, args...);
}
Kokkos::fence();
}
template <class Tag, class ValueType, class InfoType, class BinaryOp,
class... Args>
void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop,
Args... args /* copy on purpose */) {
template <class Tag, class ValueType, class InfoType, class... Args>
void run_single_scenario_inplace(const InfoType& scenario_info,
Args... args /* copy on purpose */) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// if (1 == sizeof...(Args)) {
// std::cout << "inclusive_scan custom op and init value: " << name << ", "
// << view_tag_to_string(Tag{}) << ", "
// << value_type_to_string(ValueType()) << ", " << std::endl;
// } else {
// std::cout << "inclusive_scan custom op: " << name << ", "
// << view_tag_to_string(Tag{}) << ", "
// << value_type_to_string(ValueType()) << ", " << std::endl;
// }
// since here we call the in-place operation, we need to use two views:
// view1: filled according to what the scenario asks for and is not modified
// view2: filled according to what the scenario asks for and used for the
// in-place op Therefore, after the op is done, view_2 should contain the
// result of doing exclusive scan NOTE: view2 is filled below every time
// because the algorithm acts in place
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
auto view_from = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
fill_view(view_from, name);
auto view1 =
create_view<ValueType>(Tag{}, view_ext, "inclusive_scan_inplace_view1");
fill_view(view1, name);
auto view2 =
create_view<ValueType>(Tag{}, view_ext, "inclusive_scan_inplace_view2");
{
fill_zero(view_dest);
auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest), bop,
args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, bop, args...);
fill_view(view2, name);
auto r = KE::inclusive_scan(exespace(), KE::cbegin(view2), KE::cend(view2),
KE::begin(view2), args...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, args...);
}
{
fill_zero(view_dest);
auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest), bop,
args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, bop, args...);
fill_view(view2, name);
auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view2),
KE::cend(view2), KE::begin(view2), args...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, args...);
}
{
fill_zero(view_dest);
auto r = KE::inclusive_scan(exespace(), view_from, view_dest, bop, args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, bop, args...);
fill_view(view2, name);
auto r = KE::inclusive_scan(exespace(), view2, view2, args...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, args...);
}
{
fill_zero(view_dest);
auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest, bop,
args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, bop, args...);
fill_view(view2, name);
auto r = KE::inclusive_scan("label", exespace(), view2, view2, args...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, args...);
}
Kokkos::fence();
@ -318,27 +318,35 @@ void run_inclusive_scan_all_scenarios() {
{"medium-a", 313}, {"medium-b", 1103}, {"large", 10513}};
for (const auto& it : scenarios) {
run_single_scenario_default_op<Tag, ValueType>(it);
run_single_scenario<Tag, ValueType>(it);
run_single_scenario_inplace<Tag, ValueType>(it);
#if !defined KOKKOS_ENABLE_OPENMPTARGET
// the sum custom op is always run
using sum_binary_op = SumFunctor<ValueType>;
sum_binary_op sbop;
run_single_scenario_custom_op<Tag, ValueType>(it, sbop);
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{0});
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{1});
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{-2});
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{3});
run_single_scenario<Tag, ValueType>(it, sbop);
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{0});
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{1});
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{-2});
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{3});
run_single_scenario_inplace<Tag, ValueType>(it, sbop, ValueType{0});
run_single_scenario_inplace<Tag, ValueType>(it, sbop, ValueType{-2});
// custom multiply only for small views to avoid overflows
if (it.first == "small-a" || it.first == "small-b") {
using mult_binary_op = MultiplyFunctor<ValueType>;
mult_binary_op mbop;
run_single_scenario_custom_op<Tag, ValueType>(it, mbop);
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{0});
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{1});
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{-2});
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{3});
run_single_scenario<Tag, ValueType>(it, mbop);
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{0});
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{1});
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{-2});
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{3});
run_single_scenario_inplace<Tag, ValueType>(it, mbop);
run_single_scenario_inplace<Tag, ValueType>(it, mbop, ValueType{0});
run_single_scenario_inplace<Tag, ValueType>(it, mbop, ValueType{-2});
}
#endif
}

View File

@ -146,7 +146,7 @@ void run_single_scenario(const InfoType& scenario_info) {
resultsA[3] = KE::is_sorted("label", exespace(), view);
const auto allA = std::all_of(resultsA.cbegin(), resultsA.cend(),
[=](bool v) { return v == gold; });
EXPECT_TRUE(allA);
EXPECT_TRUE(allA) << name << ", " << view_tag_to_string(Tag{});
#if !defined KOKKOS_ENABLE_OPENMPTARGET
CustomLessThanComparator<ValueType, ValueType> comp;
@ -159,7 +159,7 @@ void run_single_scenario(const InfoType& scenario_info) {
resultsB[3] = KE::is_sorted("label", exespace(), view, comp);
const auto allB = std::all_of(resultsB.cbegin(), resultsB.cend(),
[=](bool v) { return v == gold; });
EXPECT_TRUE(allB);
EXPECT_TRUE(allB) << name << ", " << view_tag_to_string(Tag{});
#endif
Kokkos::fence();
@ -173,9 +173,6 @@ void run_is_sorted_all_scenarios() {
{"medium-a", 1003}, {"medium-b", 1003}, {"large-a", 101513},
{"large-b", 101513}};
std::cout << "is_sorted: " << view_tag_to_string(Tag{})
<< ", all overloads \n";
for (const auto& it : scenarios) {
run_single_scenario<Tag, ValueType>(it);
}

View File

@ -145,10 +145,10 @@ void run_single_scenario(const InfoType& scenario_info) {
KE::is_sorted_until("label", exespace(), KE::begin(view), KE::end(view));
auto r3 = KE::is_sorted_until(exespace(), view);
auto r4 = KE::is_sorted_until("label", exespace(), view);
ASSERT_EQ(r1, gold);
ASSERT_EQ(r2, gold);
ASSERT_EQ(r3, gold);
ASSERT_EQ(r4, gold);
ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{});
ASSERT_EQ(r2, gold) << name << ", " << view_tag_to_string(Tag{});
ASSERT_EQ(r3, gold) << name << ", " << view_tag_to_string(Tag{});
ASSERT_EQ(r4, gold) << name << ", " << view_tag_to_string(Tag{});
#if !defined KOKKOS_ENABLE_OPENMPTARGET
CustomLessThanComparator<ValueType, ValueType> comp;
@ -160,10 +160,10 @@ void run_single_scenario(const InfoType& scenario_info) {
auto r8 = KE::is_sorted_until("label", exespace(), view, comp);
#endif
ASSERT_EQ(r1, gold);
ASSERT_EQ(r2, gold);
ASSERT_EQ(r3, gold);
ASSERT_EQ(r4, gold);
ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{});
ASSERT_EQ(r2, gold) << name << ", " << view_tag_to_string(Tag{});
ASSERT_EQ(r3, gold) << name << ", " << view_tag_to_string(Tag{});
ASSERT_EQ(r4, gold) << name << ", " << view_tag_to_string(Tag{});
Kokkos::fence();
}
@ -176,9 +176,6 @@ void run_is_sorted_until_all_scenarios() {
{"medium-a", 1003}, {"medium-b", 1003}, {"large-a", 101513},
{"large-b", 101513}};
std::cout << "is_sorted_until: " << view_tag_to_string(Tag{})
<< ", all overloads \n";
for (const auto& it : scenarios) {
run_single_scenario<Tag, ValueType>(it);
}

View File

@ -48,7 +48,7 @@ struct MyMovableType {
TEST(std_algorithms_mod_ops_test, move) {
MyMovableType a;
using move_t = decltype(std::move(a));
static_assert(std::is_rvalue_reference<move_t>::value, "");
static_assert(std::is_rvalue_reference<move_t>::value);
// move constr
MyMovableType b(std::move(a));
@ -70,7 +70,7 @@ struct StdAlgoModSeqOpsTestMove {
void operator()(const int index) const {
typename ViewType::value_type a{11};
using move_t = decltype(std::move(a));
static_assert(std::is_rvalue_reference<move_t>::value, "");
static_assert(std::is_rvalue_reference<move_t>::value);
m_view(index) = std::move(a);
}
@ -89,50 +89,6 @@ TEST(std_algorithms_mod_ops_test, move_within_parfor) {
}
}
// ------------
// swap
// ------------
TEST(std_algorithms_mod_ops_test, swap) {
{
int a = 1;
int b = 2;
KE::swap(a, b);
ASSERT_EQ(a, 2);
ASSERT_EQ(b, 1);
}
{
double a = 3.;
double b = 1.;
KE::swap(a, b);
EXPECT_DOUBLE_EQ(a, 1.);
EXPECT_DOUBLE_EQ(b, 3.);
}
}
template <class ViewType>
struct StdAlgoModSeqOpsTestSwap {
ViewType m_view;
KOKKOS_INLINE_FUNCTION
void operator()(const int index) const {
typename ViewType::value_type newval{11};
KE::swap(m_view(index), newval);
}
StdAlgoModSeqOpsTestSwap(ViewType aIn) : m_view(aIn) {}
};
TEST(std_algorithms_mod_ops_test, swap_within_parfor) {
auto a = create_view<double>(stdalgos::DynamicTag{}, 10, "a");
StdAlgoModSeqOpsTestSwap<decltype(a)> fnc(a);
Kokkos::parallel_for(a.extent(0), fnc);
auto a_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a);
for (std::size_t i = 0; i < a.extent(0); ++i) {
EXPECT_DOUBLE_EQ(a_h(0), 11.);
}
}
// ------------
// iter_swap
// ------------

View File

@ -110,11 +110,9 @@ void verify_data(const std::string& name, ResultType my_result,
ViewTypeDestFalse view_dest_false, PredType pred) {
using value_type = typename ViewTypeFrom::value_type;
static_assert(
std::is_same<value_type, typename ViewTypeDestTrue::value_type>::value,
"");
std::is_same<value_type, typename ViewTypeDestTrue::value_type>::value);
static_assert(
std::is_same<value_type, typename ViewTypeDestFalse::value_type>::value,
"");
std::is_same<value_type, typename ViewTypeDestFalse::value_type>::value);
const std::size_t ext = view_from.extent(0);

View File

@ -166,6 +166,10 @@ void run_all_scenarios() {
}
TEST(std_algorithms_copy_if_team_test, test) {
// FIXME_OPENMPTARGET
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
#endif
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();

View File

@ -121,7 +121,9 @@ struct TestFunctorA {
}
};
template <class LayoutTag, class ValueType>
struct InPlace {};
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
/* description:
use a rank-2 view randomly filled with values,
@ -147,9 +149,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
using space_t = Kokkos::DefaultExecutionSpace;
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
// exclusive_scan returns an iterator so to verify that it is correct
// each team stores the distance of the returned iterator from the beginning
// of the interval that team operates on and then we check that these
@ -168,12 +167,19 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
rand_pool pool(lowerBound * upperBound);
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
// use CTAD for functor
auto initValuesView =
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
TestFunctorA fnc(sourceView, sourceView, distancesView,
intraTeamSentinelView, initValuesView, binaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
} else {
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
}
// -----------------------------------------------
// run cpp-std kernel and check
@ -223,11 +229,16 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
#undef exclusive_scan
}
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
} else {
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
}
}
template <class LayoutTag, class ValueType>
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void run_all_scenarios() {
for (int numTeams : teamSizesToTest) {
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
@ -236,16 +247,24 @@ void run_all_scenarios() {
#else
for (int apiId : {0, 1}) {
#endif
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
}
}
}
}
TEST(std_algorithms_exclusive_scan_team_test, test) {
// FIXME_OPENMPTARGET
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
#endif
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();
run_all_scenarios<DynamicTag, double, InPlace>();
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
}
} // namespace TeamExclusiveScan

View File

@ -139,7 +139,9 @@ struct TestFunctorA {
}
};
template <class LayoutTag, class ValueType>
struct InPlace {};
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
/* description:
use a rank-2 view randomly filled with values,
@ -165,9 +167,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
using space_t = Kokkos::DefaultExecutionSpace;
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
// inclusive_scan returns an iterator so to verify that it is correct
// each team stores the distance of the returned iterator from the beginning
// of the interval that team operates on and then we check that these
@ -186,12 +185,20 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
rand_pool pool(lowerBound * upperBound);
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
// use CTAD for functor
auto initValuesView =
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
TestFunctorA fnc(sourceView, sourceView, distancesView,
intraTeamSentinelView, initValuesView, binaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
} else {
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
}
// -----------------------------------------------
// run cpp-std kernel and check
@ -251,25 +258,38 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
#undef inclusive_scan
}
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
} else {
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
}
}
template <class LayoutTag, class ValueType>
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void run_all_scenarios() {
for (int numTeams : teamSizesToTest) {
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
for (int apiId : {0, 1, 2, 3, 4, 5}) {
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
}
}
}
}
TEST(std_algorithms_inclusive_scan_team_test, test) {
// FIXME_OPENMPTARGET
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
#endif
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();
run_all_scenarios<DynamicTag, double, InPlace>();
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
}
} // namespace TeamInclusiveScan

View File

@ -212,6 +212,10 @@ void run_all_scenarios() {
}
TEST(std_algorithms_remove_copy_team_test, test) {
// FIXME_OPENMPTARGET
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
#endif
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();

View File

@ -168,6 +168,10 @@ void run_all_scenarios() {
}
TEST(std_algorithms_remove_copy_if_team_test, test) {
// FIXME_OPENMPTARGET
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
#endif
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();

View File

@ -108,7 +108,9 @@ struct TestFunctorA {
}
};
template <class LayoutTag, class ValueType>
struct InPlace {};
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
/* description:
use a rank-2 view randomly filled with values,
@ -134,9 +136,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
using space_t = Kokkos::DefaultExecutionSpace;
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
// tranform_exclusive_scan returns an iterator so to verify that it is correct
// each team stores the distance of the returned iterator from the beginning
// of the interval that team operates on and then we check that these
@ -156,12 +155,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
rand_pool pool(lowerBound * upperBound);
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
// use CTAD for functor
auto initValuesView =
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, unaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
TestFunctorA fnc(sourceView, sourceView, distancesView,
intraTeamSentinelView, initValuesView, binaryOp, unaryOp,
apiId);
Kokkos::parallel_for(policy, fnc);
} else {
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, unaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
}
// -----------------------------------------------
// run cpp-std kernel and check
@ -200,16 +208,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
#undef transform_exclusive_scan
}
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
} else {
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
}
}
template <class LayoutTag, class ValueType>
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void run_all_scenarios() {
for (int numTeams : teamSizesToTest) {
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
for (int apiId : {0, 1}) {
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
}
}
}
@ -219,6 +232,10 @@ TEST(std_algorithms_transform_exclusive_scan_team_test, test) {
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();
run_all_scenarios<DynamicTag, double, InPlace>();
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
}
} // namespace TeamTransformExclusiveScan

View File

@ -131,7 +131,9 @@ struct TestFunctorA {
}
};
template <class LayoutTag, class ValueType>
struct InPlace {};
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
/* description:
use a rank-2 view randomly filled with values,
@ -157,9 +159,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
using space_t = Kokkos::DefaultExecutionSpace;
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
// tranform_inclusive_scan returns an iterator so to verify that it is correct
// each team stores the distance of the returned iterator from the beginning
// of the interval that team operates on and then we check that these
@ -179,12 +178,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
rand_pool pool(lowerBound * upperBound);
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
// use CTAD for functor
auto initValuesView =
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, unaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
TestFunctorA fnc(sourceView, sourceView, distancesView,
intraTeamSentinelView, initValuesView, binaryOp, unaryOp,
apiId);
Kokkos::parallel_for(policy, fnc);
} else {
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, unaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
}
// -----------------------------------------------
// run cpp-std kernel and check
@ -236,16 +244,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
}
#undef transform_inclusive_scan
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
} else {
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
}
}
template <class LayoutTag, class ValueType>
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void run_all_scenarios() {
for (int numTeams : teamSizesToTest) {
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
for (int apiId : {0, 1, 2, 3}) {
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
}
}
}
@ -255,6 +268,10 @@ TEST(std_algorithms_transform_inclusive_scan_team_test, test) {
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();
run_all_scenarios<DynamicTag, double, InPlace>();
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
}
} // namespace TeamTransformInclusiveScan

View File

@ -186,6 +186,10 @@ void run_all_scenarios() {
}
TEST(std_algorithms_unique_copy_team_test, test) {
// FIXME_OPENMPTARGET
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
#endif
run_all_scenarios<DynamicTag, int>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, int>();

View File

@ -16,6 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp>
#include <utility>
#include <iomanip>
namespace Test {
namespace stdalgos {
@ -160,24 +161,15 @@ void verify_data(ViewType1 data_view, // contains data
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
if (test_view_h.extent(0) > 0) {
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
// << gold_h(i) << " " << test_view_h(i) << " "
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
if (std::is_same<gold_view_value_type, int>::value) {
ASSERT_EQ(gold_h(i), test_view_h(i));
} else {
const auto error = std::abs(gold_h(i) - test_view_h(i));
if (error > 1e-10) {
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
<< " " << gold_h(i) << " " << test_view_h(i) << " "
<< std::abs(gold_h(i) - test_view_h(i)) << std::endl;
}
EXPECT_LT(error, 1e-10);
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
<< static_cast<double>(test_view_h(i)) << " "
<< static_cast<double>(gold_h(i));
}
}
// std::cout << " last el: " << test_view_h(test_view_h.extent(0)-1) <<
// std::endl;
}
}
@ -205,17 +197,13 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value,
BinaryOp bop, UnaryOp uop) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// std::cout << "transform_exclusive_scan custom op: " << name << ", "
// << view_tag_to_string(Tag{}) << ", "
// << value_type_to_string(ValueType()) << ", "
// << "init = " << init_value << std::endl;
auto view_dest =
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan");
auto view_from =
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan");
auto view_from = create_view<ValueType>(Tag{}, view_ext,
"transform_exclusive_scan_view_from");
fill_view(view_from, name);
auto view_dest = create_view<ValueType>(Tag{}, view_ext,
"transform_exclusive_scan_view_dest");
{
fill_zero(view_dest);
auto r = KE::transform_exclusive_scan(
@ -253,6 +241,65 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value,
Kokkos::fence();
}
template <class Tag, class ValueType, class InfoType, class BinaryOp,
class UnaryOp>
void run_single_scenario_inplace(const InfoType& scenario_info,
ValueType init_value, BinaryOp bop,
UnaryOp uop) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// since here we call the in-place operation, we need to use two views:
// view1: filled according to what the scenario asks for and is not modified
// view2: filled according to what the scenario asks for and used for the
// in-place op Therefore, after the op is done, view2 should contain the
// result of doing exclusive scan NOTE: view2 is filled below every time
// because the algorithm acts in place
auto view1 =
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan_view1");
fill_view(view1, name);
auto view2 =
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan_view2");
{
fill_view(view2, name);
auto r = KE::transform_exclusive_scan(exespace(), KE::cbegin(view2),
KE::cend(view2), KE::begin(view2),
init_value, bop, uop);
ASSERT_EQ(r, KE::end(view2));
verify_data(view1, view2, init_value, bop, uop);
}
{
fill_view(view2, name);
auto r = KE::transform_exclusive_scan(
"label", exespace(), KE::cbegin(view2), KE::cend(view2),
KE::begin(view2), init_value, bop, uop);
ASSERT_EQ(r, KE::end(view2));
verify_data(view1, view2, init_value, bop, uop);
}
{
fill_view(view2, name);
auto r = KE::transform_exclusive_scan(exespace(), view2, view2, init_value,
bop, uop);
ASSERT_EQ(r, KE::end(view2));
verify_data(view1, view2, init_value, bop, uop);
}
{
fill_view(view2, name);
auto r = KE::transform_exclusive_scan("label", exespace(), view2, view2,
init_value, bop, uop);
ASSERT_EQ(r, KE::end(view2));
verify_data(view1, view2, init_value, bop, uop);
}
Kokkos::fence();
}
template <class Tag, class ValueType>
void run_all_scenarios() {
const std::map<std::string, std::size_t> scenarios = {
@ -267,6 +314,11 @@ void run_all_scenarios() {
run_single_scenario<Tag, ValueType>(it, ValueType{1}, bop_t(), uop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{-2}, bop_t(), uop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{3}, bop_t(), uop_t());
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0}, bop_t(),
uop_t());
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2}, bop_t(),
uop_t());
}
}

View File

@ -16,6 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp>
#include <utility>
#include <iomanip>
namespace Test {
namespace stdalgos {
@ -172,24 +173,15 @@ void verify_data(ViewType1 data_view, // contains data
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
if (test_view_h.extent(0) > 0) {
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
// << gold_h(i) << " " << test_view_h(i) << " "
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
if (std::is_same<gold_view_value_type, int>::value) {
ASSERT_EQ(gold_h(i), test_view_h(i));
} else {
const auto error = std::abs(gold_h(i) - test_view_h(i));
if (error > 1e-10) {
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
<< " " << gold_h(i) << " " << test_view_h(i) << " "
<< std::abs(gold_h(i) - test_view_h(i)) << std::endl;
}
EXPECT_LT(error, 1e-10);
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
<< static_cast<double>(test_view_h(i)) << " "
<< static_cast<double>(gold_h(i));
}
}
// std::cout << " last el: " << test_view_h(test_view_h.extent(0)-1) <<
// std::endl;
}
}
@ -210,30 +202,11 @@ struct SumBinaryFunctor {
std::string value_type_to_string(int) { return "int"; }
std::string value_type_to_string(double) { return "double"; }
template <class Tag, class BopT, class UopT>
void print_scenario_details(const std::string& name, BopT bop, UopT uop) {
(void)bop;
(void)uop;
std::cout << "transform_inclusive_scan: " << name << ", "
<< view_tag_to_string(Tag{}) << std::endl;
}
template <class Tag, class BopT, class UopT, class ValueType>
void print_scenario_details(const std::string& name, BopT bop, UopT uop,
ValueType init_value) {
(void)bop;
(void)uop;
std::cout << "transform_inclusive_scan: " << name << ", "
<< view_tag_to_string(Tag{}) << ", "
<< "init = " << init_value << std::endl;
}
template <class Tag, class ValueType, class InfoType, class... Args>
void run_single_scenario(const InfoType& scenario_info,
Args... args /* by value on purpose*/) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// print_scenario_details<Tag>(name, args...);
auto view_dest =
create_view<ValueType>(Tag{}, view_ext, "transform_inclusive_scan");
@ -278,6 +251,63 @@ void run_single_scenario(const InfoType& scenario_info,
Kokkos::fence();
}
template <class Tag, class ValueType, class InfoType, class... Args>
void run_single_scenario_inplace(const InfoType& scenario_info,
Args... args /* by value on purpose*/) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// since here we call the in-place operation, we need to use two views:
// view1: filled according to scenario and is not modified
// view2: filled according scenario and used for the in-place op
// Therefore, after the op is done, view_2 should contain the
// result of doing exclusive scan.
// NOTE: view2 must be filled before every call to the algorithm
// because the algorithm acts in place
auto view_1 = create_view<ValueType>(Tag{}, view_ext,
"transform_inclusive_scan_view_1");
fill_view(view_1, name);
auto view_2 = create_view<ValueType>(Tag{}, view_ext,
"transform_inclusive_scan_view_2");
{
fill_view(view_2, name);
auto r = KE::transform_inclusive_scan(exespace(), KE::cbegin(view_2),
KE::cend(view_2), KE::begin(view_2),
args...);
ASSERT_EQ(r, KE::end(view_2));
verify_data(view_1, view_2, args...);
}
{
fill_view(view_2, name);
auto r = KE::transform_inclusive_scan("label", exespace(),
KE::cbegin(view_2), KE::cend(view_2),
KE::begin(view_2), args...);
ASSERT_EQ(r, KE::end(view_2));
verify_data(view_1, view_2, args...);
}
{
fill_view(view_2, name);
auto r = KE::transform_inclusive_scan(exespace(), view_2, view_2, args...);
ASSERT_EQ(r, KE::end(view_2));
verify_data(view_1, view_2, args...);
}
{
fill_view(view_2, name);
auto r = KE::transform_inclusive_scan("label", exespace(), view_2, view_2,
args...);
ASSERT_EQ(r, KE::end(view_2));
verify_data(view_1, view_2, args...);
}
Kokkos::fence();
}
template <class Tag, class ValueType>
void run_all_scenarios() {
const std::map<std::string, std::size_t> scenarios = {
@ -294,15 +324,23 @@ void run_all_scenarios() {
run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{2});
run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{-1});
run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{-2});
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t());
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t(),
ValueType{0});
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t(),
ValueType{2});
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t(),
ValueType{-2});
}
}
#if !defined KOKKOS_ENABLE_OPENMPTARGET
TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan) {
run_all_scenarios<DynamicTag, double>();
// run_all_scenarios<StridedThreeTag, double>();
// run_all_scenarios<DynamicTag, int>();
// run_all_scenarios<StridedThreeTag, int>();
run_all_scenarios<StridedThreeTag, double>();
run_all_scenarios<DynamicTag, int>();
run_all_scenarios<StridedThreeTag, int>();
}
#endif

View File

@ -83,9 +83,6 @@ auto run_min_or_max_test(ViewType view, StdReducersTestEnumOrder enValue) {
static_assert(std::is_same<ExeSpace, Kokkos::HostSpace>::value,
"test is only enabled for HostSpace");
std::cout << "checking reduction with order: " << order_to_string(enValue)
<< "\n";
using view_value_type = typename ViewType::value_type;
using reducer_type = std::conditional_t<
(flag == 0), Kokkos::MaxFirstLoc<view_value_type, IndexType, ExeSpace>,
@ -132,18 +129,24 @@ TEST(std_algorithms_reducers, max_first_loc) {
const auto pair1 = run_min_or_max_test<0, hostspace, index_type>(
view_h, StdReducersTestEnumOrder::LeftToRight);
ASSERT_EQ(pair1.first, gold_value);
ASSERT_EQ(pair1.second, gold_location);
ASSERT_EQ(pair1.first, gold_value)
<< order_to_string(StdReducersTestEnumOrder::LeftToRight);
ASSERT_EQ(pair1.second, gold_location)
<< order_to_string(StdReducersTestEnumOrder::LeftToRight);
const auto pair2 = run_min_or_max_test<0, hostspace, index_type>(
view_h, StdReducersTestEnumOrder::RightToLeft);
ASSERT_EQ(pair2.first, gold_value);
ASSERT_EQ(pair2.second, gold_location);
ASSERT_EQ(pair2.first, gold_value)
<< order_to_string(StdReducersTestEnumOrder::RightToLeft);
ASSERT_EQ(pair2.second, gold_location)
<< order_to_string(StdReducersTestEnumOrder::RightToLeft);
const auto pair3 = run_min_or_max_test<0, hostspace, index_type>(
view_h, StdReducersTestEnumOrder::Random);
ASSERT_EQ(pair3.first, gold_value);
ASSERT_EQ(pair3.second, gold_location);
ASSERT_EQ(pair3.first, gold_value)
<< order_to_string(StdReducersTestEnumOrder::Random);
ASSERT_EQ(pair3.second, gold_location)
<< order_to_string(StdReducersTestEnumOrder::Random);
}
TEST(std_algorithms_reducers, min_first_loc) {
@ -191,9 +194,6 @@ void run_min_max_test(ViewType view, StdReducersTestEnumOrder enValue,
static_assert(std::is_same<ExeSpace, Kokkos::HostSpace>::value,
"test is only enabled for HostSpace");
std::cout << "checking reduction with order: " << order_to_string(enValue)
<< "\n";
using view_value_type = typename ViewType::value_type;
using reducer_type =
Kokkos::MinMaxFirstLastLoc<view_value_type, IndexType, ExeSpace>;
@ -212,10 +212,10 @@ void run_min_max_test(ViewType view, StdReducersTestEnumOrder enValue,
reduction_value_type{view(index), view(index), index, index});
}
ASSERT_EQ(red_result.min_val, gold_values.first);
ASSERT_EQ(red_result.max_val, gold_values.second);
ASSERT_EQ(red_result.min_loc, gold_locs.first);
ASSERT_EQ(red_result.max_loc, gold_locs.second);
ASSERT_EQ(red_result.min_val, gold_values.first) << order_to_string(enValue);
ASSERT_EQ(red_result.max_val, gold_values.second) << order_to_string(enValue);
ASSERT_EQ(red_result.min_loc, gold_locs.first) << order_to_string(enValue);
ASSERT_EQ(red_result.max_loc, gold_locs.second) << order_to_string(enValue);
}
TEST(std_algorithms_reducers, min_max_first_last_loc) {

View File

@ -1 +1,12 @@
#FIXME_OPENMPTARGET - compiling in debug mode causes ICE.
KOKKOS_ADD_BENCHMARK_DIRECTORIES(atomic)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(gather)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(gups)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(launch_latency)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(stream)
#FIXME_OPENMPTARGET - These two benchmarks cause ICE. Commenting them for now but a deeper analysis on the cause and a possible fix will follow.
IF(NOT Kokkos_ENABLE_OPENMPTARGET)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(policy_performance)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(bytes_and_flops)
ENDIF()

View File

@ -0,0 +1,4 @@
KOKKOS_ADD_EXECUTABLE(
atomic
SOURCES main.cpp
)

View File

@ -0,0 +1,4 @@
KOKKOS_ADD_EXECUTABLE(
bytes_and_flops
SOURCES bench_double.cpp bench_float.cpp bench_int32_t.cpp bench_int64_t.cpp main.cpp
)

View File

@ -37,22 +37,22 @@ struct RunStride {
};
#define STRIDE 1
#include <bench_stride.hpp>
#include "bench_stride.hpp"
#undef STRIDE
#define STRIDE 2
#include <bench_stride.hpp>
#include "bench_stride.hpp"
#undef STRIDE
#define STRIDE 4
#include <bench_stride.hpp>
#include "bench_stride.hpp"
#undef STRIDE
#define STRIDE 8
#include <bench_stride.hpp>
#include "bench_stride.hpp"
#undef STRIDE
#define STRIDE 16
#include <bench_stride.hpp>
#include "bench_stride.hpp"
#undef STRIDE
#define STRIDE 32
#include <bench_stride.hpp>
#include "bench_stride.hpp"
#undef STRIDE
template <class Scalar>

View File

@ -14,7 +14,7 @@
//
//@HEADER
#include <bench.hpp>
#include "bench.hpp"
template void run_stride_unroll<double>(int N, int K, int R, int D, int U,
int F, int T, int S, int B, int I);

View File

@ -14,7 +14,7 @@
//
//@HEADER
#include <bench.hpp>
#include "bench.hpp"
template void run_stride_unroll<float>(int N, int K, int R, int D, int U, int F,
int T, int S, int B, int I);

View File

@ -14,7 +14,7 @@
//
//@HEADER
#include <bench.hpp>
#include "bench.hpp"
template void run_stride_unroll<int32_t>(int N, int K, int R, int D, int U,
int F, int T, int S, int B, int I);

View File

@ -14,7 +14,7 @@
//
//@HEADER
#include <bench.hpp>
#include "bench.hpp"
template void run_stride_unroll<int64_t>(int N, int K, int R, int D, int U,
int F, int T, int S, int B, int I);

View File

@ -15,28 +15,28 @@
//@HEADER
#define UNROLL 1
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 2
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 3
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 4
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 5
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 6
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 7
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 8
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
template <class Scalar>

View File

@ -26,7 +26,7 @@ struct Run<Scalar, UNROLL, STRIDE> {
Kokkos::deep_copy(C, Scalar(3.5));
Kokkos::Timer timer;
for (int i = 0; i < I; ++i) {
for (int iter = 0; iter < I; ++iter) {
Kokkos::parallel_for(
"BenchmarkKernel",
Kokkos::TeamPolicy<>(N, T).set_scratch_size(0, Kokkos::PerTeam(S)),

View File

@ -16,7 +16,7 @@
#include <Kokkos_Core.hpp>
#include <Kokkos_Timer.hpp>
#include <bench.hpp>
#include "bench.hpp"
#include <cstdlib>
extern template void run_stride_unroll<float>(int, int, int, int, int, int, int,
@ -86,7 +86,7 @@ int main(int argc, char* argv[]) {
printf("D must be one of 1,2,4,8,16,32\n");
return 0;
}
if ((P < 1) && (P > 2)) {
if ((P < 1) || (P > 4)) {
printf("P must be one of 1,2,3,4\n");
return 0;
}

View File

@ -0,0 +1,4 @@
KOKKOS_ADD_EXECUTABLE(
gather
SOURCES main.cpp
)

View File

@ -20,28 +20,28 @@ struct RunGather {
};
#define UNROLL 1
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 2
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 3
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 4
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 5
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 6
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 7
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 8
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
template <class Scalar>

View File

@ -138,7 +138,7 @@ struct RunGather<Scalar, UNROLL> {
printf(
"SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: "
"%lf GGather/s: %lf\n",
sizeof(Scalar) / 4, N, K, D, R, UNROLL, F, seconds,
static_cast<int>(sizeof(Scalar) / 4), N, K, D, R, UNROLL, F, seconds,
1.0 * bytes / seconds / 1024 / 1024 / 1024, 1.e-9 * flops / seconds,
1.e-9 * gather_ops / seconds);
}

View File

@ -16,7 +16,7 @@
#include <Kokkos_Core.hpp>
#include <Kokkos_Timer.hpp>
#include <gather.hpp>
#include "gather.hpp"
#include <cstdlib>
int main(int argc, char* argv[]) {

View File

@ -0,0 +1,4 @@
KOKKOS_ADD_EXECUTABLE(
launch_latency
SOURCES launch_latency.cpp
)

View File

@ -0,0 +1,283 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
/*! \file launch_latency.cpp
Tests of parallel_for and parallel_reduce latency for different
circumstances.
Three launch kinds are tested: parallel_for, parallel_reduce into scalar,
and parallel_reduce into view
N controls how large the parallel loops is
V controls how large the functor is
M controls across how many launches the latency is averaged
K controls how larege the nested loop is (no larger than V)
For each launch kind,
1. Avg functor dispatch latency: (time to do M launches) / M
2. Avg functor completion throughput: (M launches + sync) / M
3. Avg functor completion latency: (M (launch + sync)) / M
*/
#include <Kokkos_Core.hpp>
template <int V>
struct TestFunctor {
double values[V];
Kokkos::View<double*> a;
int K;
TestFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (int j = 0; j < K; j++) a(i) += 1.0 * i * values[j];
}
};
template <int V>
struct TestRFunctor {
double values[V];
Kokkos::View<double*> a;
int K;
TestRFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, double& lsum) const {
for (int j = 0; j < K; j++) a(i) += 1.0 * i * values[j];
lsum += a(i);
}
};
struct Opts {
bool par_for = true;
bool par_reduce = true;
bool par_reduce_view = true;
};
template <int V>
void run(int N, int M, int K, const Opts& opts) {
std::string l_no_fence, l_fence, l_red_no_fence, l_red_fence,
l_red_view_no_fence, l_red_view_fence;
{
std::ostringstream ostream;
ostream << "RunNoFence_" << N << "_" << K << std::endl;
l_no_fence = ostream.str();
}
{
std::ostringstream ostream;
ostream << "RunFence_" << N << "_" << K << std::endl;
l_fence = ostream.str();
}
{
std::ostringstream ostream;
ostream << "RunReduceNoFence_" << N << "_" << K << std::endl;
l_red_no_fence = ostream.str();
}
{
std::ostringstream ostream;
ostream << "RunReduceFence_" << N << "_" << K << std::endl;
l_red_fence = ostream.str();
}
{
std::ostringstream ostream;
ostream << "RunReduceViewNoFence_" << N << "_" << K << std::endl;
l_red_view_no_fence = ostream.str();
}
{
std::ostringstream ostream;
ostream << "RunReduceViewFence_" << N << "_" << K << std::endl;
l_red_view_fence = ostream.str();
}
double result;
Kokkos::View<double*> a("A", N);
Kokkos::View<double> v_result("result");
TestFunctor<V> f(a, K);
TestRFunctor<V> rf(a, K);
Kokkos::Timer timer;
// initialize to an obviously wrong value
double time_no_fence = -1; // launch loop
double time_no_fence_fenced = -1; // launch loop then fence
double time_fence = -1; // launch&fence loop
double time_red_no_fence = -1;
double time_red_no_fence_fenced = -1;
double time_red_fence = -1;
double time_red_view_no_fence = -1;
double time_red_view_no_fence_fenced = -1;
double time_red_view_fence = -1;
if (opts.par_for) {
// warmup
for (int i = 0; i < 4; ++i) {
Kokkos::parallel_for(l_no_fence, N, f);
}
Kokkos::fence();
timer.reset();
for (int i = 0; i < M; i++) {
Kokkos::parallel_for(l_no_fence, N, f);
}
time_no_fence = timer.seconds();
Kokkos::fence();
time_no_fence_fenced = timer.seconds();
timer.reset();
for (int i = 0; i < M; i++) {
Kokkos::parallel_for(l_fence, N, f);
Kokkos::fence();
}
time_fence = timer.seconds();
}
if (opts.par_reduce) {
// warmup
for (int i = 0; i < 4; ++i) {
Kokkos::parallel_reduce(l_red_no_fence, N, rf, result);
}
Kokkos::fence();
timer.reset();
for (int i = 0; i < M; i++) {
Kokkos::parallel_reduce(l_red_no_fence, N, rf, result);
}
time_red_no_fence = timer.seconds();
Kokkos::fence();
time_red_no_fence_fenced = timer.seconds();
timer.reset();
for (int i = 0; i < M; i++) {
Kokkos::parallel_reduce(l_red_fence, N, rf, result);
Kokkos::fence();
}
time_red_fence = timer.seconds();
Kokkos::fence();
}
if (opts.par_reduce_view) {
// warmup
for (int i = 0; i < 4; ++i) {
Kokkos::parallel_reduce(l_red_view_no_fence, N, rf, v_result);
}
Kokkos::fence();
timer.reset();
for (int i = 0; i < M; i++) {
Kokkos::parallel_reduce(l_red_view_no_fence, N, rf, v_result);
}
time_red_view_no_fence = timer.seconds();
Kokkos::fence();
time_red_view_no_fence_fenced = timer.seconds();
timer.reset();
for (int i = 0; i < M; i++) {
Kokkos::parallel_reduce(l_red_view_fence, N, rf, v_result);
Kokkos::fence();
}
time_red_view_fence = timer.seconds();
Kokkos::fence();
timer.reset();
}
const double x = 1.e6 / M;
printf("%i %i %i %i", N, V, K, M);
if (opts.par_for) {
printf(" parallel_for: %lf %lf ( %lf )", x * time_no_fence, x * time_fence,
x * time_no_fence_fenced);
}
if (opts.par_reduce) {
printf(" parallel_reduce: %lf %lf ( %lf )", x * time_red_no_fence,
x * time_red_fence, x * time_red_no_fence_fenced);
}
if (opts.par_reduce_view) {
printf(" parallel_reduce(view): %lf %lf ( %lf )",
x * time_red_view_no_fence, x * time_red_view_fence,
x * time_red_view_no_fence_fenced);
}
printf("\n");
}
int main(int argc, char* argv[]) {
Kokkos::initialize(argc, argv);
{
int N = 10000;
int M = 20;
int K = 1;
Opts opts;
printf("==========================\n");
printf("Kokkos Launch Latency Test\n");
printf("==========================\n");
printf("\n");
printf("Usage: %s ARGUMENTS [OPTIONS...]\n\n", argv[0]);
printf("Arguments: N M K\n");
printf(" N: loop length\n");
printf(" M: how many kernels to dispatch\n");
printf(
" K: nested loop length (capped by size of functor member array\n\n");
printf("Options:\n");
printf(" --no-parallel-for: skip parallel_for benchmark\n");
printf(" --no-parallel-reduce: skip parallel_reduce benchmark\n");
printf(
" --no-parallel-reduce-view: skip parallel_reduce into view "
"benchmark\n");
printf("\n\n");
printf(" Output V is the size of the functor member array\n");
printf("\n\n");
for (int i = 1; i < argc; ++i) {
const std::string_view arg(argv[i]);
// anything that doesn't start with --
if (arg.size() < 2 ||
(arg.size() >= 2 && arg[0] != '-' && arg[1] != '-')) {
if (i == 1)
N = atoi(arg.data());
else if (i == 2)
M = atoi(arg.data());
else if (i == 3)
K = atoi(arg.data());
else {
throw std::runtime_error("unexpected argument!");
}
} else if (arg == "--no-parallel-for") {
opts.par_for = false;
} else if (arg == "--no-parallel-reduce") {
opts.par_reduce = false;
} else if (arg == "--no-parallel-reduce-view") {
opts.par_reduce_view = false;
} else {
std::stringstream ss;
ss << "unexpected argument \"" << arg << "\" at position " << i;
throw std::runtime_error(ss.str());
}
}
printf("N V K M time_no_fence time_fence (time_no_fence_fenced)\n");
/* A backend may have different launch strategies for functors of different
* sizes: test a variety of functor sizes.*/
run<1>(N, M, K <= 1 ? K : 1, opts);
run<16>(N, M, K <= 16 ? K : 16, opts);
run<200>(N, M, K <= 200 ? K : 200, opts);
run<3000>(N, M, K <= 3000 ? K : 3000, opts);
run<30000>(N, M, K <= 30000 ? K : 30000, opts);
}
Kokkos::finalize();
}

View File

@ -0,0 +1,4 @@
KOKKOS_ADD_EXECUTABLE(
policy_performance
SOURCES main.cpp
)

View File

@ -106,8 +106,9 @@ int main(int argc, char* argv[]) {
Kokkos::parallel_reduce(
"parallel_reduce warmup", Kokkos::TeamPolicy<>(10, 1),
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type team,
double& lval) { lval += 1; },
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type&, double& lval) {
lval += 1;
},
result);
using view_type_1d = Kokkos::View<double*, Kokkos::LayoutRight>;

View File

@ -21,13 +21,13 @@ struct ParallelScanFunctor {
using value_type = double;
ViewType v;
ParallelScanFunctor(const ViewType& v_) : v(v_) {}
explicit ParallelScanFunctor(const ViewType& v_) : v(v_) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int idx, value_type& val, const bool& final) const {
void operator()(const int idx, value_type& val, const bool& is_final) const {
// inclusive scan
val += v(idx);
if (final) {
if (is_final) {
v(idx) = val;
}
}
@ -109,7 +109,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
vector_result = 0.0;
Kokkos::parallel_reduce(
Kokkos::ThreadVectorRange(team, vector_range),
[&](const int vi, double& vval) { vval += 1; },
[&](const int, double& vval) { vval += 1; },
vector_result);
}
v2(idx, t) = vector_result;
@ -128,7 +128,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
team_result = 0.0;
Kokkos::parallel_reduce(
Kokkos::TeamThreadRange(team, thread_range),
[&](const int t, double& lval) { lval += 1; }, team_result);
[&](const int, double& lval) { lval += 1; }, team_result);
}
v1(idx) = team_result;
// prevent compiler optimizing loop away
@ -170,13 +170,13 @@ void test_policy(int team_range, int thread_range, int vector_range,
for (int tr = 0; tr < thread_repeat; ++tr) {
Kokkos::parallel_reduce(
Kokkos::TeamThreadRange(team, thread_range),
[&](const int t, double& lval) {
[&](const int, double& lval) {
double vector_result = 0.0;
for (int vr = 0; vr < inner_repeat; ++vr) {
vector_result = 0.0;
Kokkos::parallel_reduce(
Kokkos::ThreadVectorRange(team, vector_range),
[&](const int vi, double& vval) { vval += 1; },
[&](const int, double& vval) { vval += 1; },
vector_result);
lval += vector_result;
}

View File

@ -0,0 +1,4 @@
KOKKOS_ADD_EXECUTABLE(
stream
SOURCES stream-kokkos.cpp
)

View File

@ -229,7 +229,7 @@ do
fi
;;
#Handle known nvcc args
--dryrun|--verbose|--keep|--source-in-ptx|-src-in-ptx|--keep-dir*|-G|-lineinfo|-extended-lambda|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|--fmad=*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
--dryrun|-dryrun|--verbose|--keep|-keep|--source-in-ptx|-src-in-ptx|--keep-dir*|-keep-dir*|-G|-lineinfo|--generate-line-info|-extended-lambda|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-res-usage|-fmad=*|--use_fast_math|-use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
cuda_args="$cuda_args $1"
;;
#Handle more known nvcc args

View File

@ -39,10 +39,12 @@ IF("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS)
GLOBAL
CHECK_CUDA_COMPILES)
ELSEIF(@Kokkos_ENABLE_CUDA@ AND NOT "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS)
ELSEIF(@Kokkos_ENABLE_CUDA@
AND NOT @KOKKOS_COMPILE_LANGUAGE@ STREQUAL CUDA
AND NOT "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS)
#
# if CUDA was enabled, separable compilation was not specified, and current compiler
# cannot compile CUDA, then set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK globally and
# if CUDA was enabled, the compilation language was not set to CUDA, and separable compilation was not
# specified, then set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK globally and
# kokkos_launch_compiler will re-direct to the compiler used to compile CUDA code during installation.
# kokkos_launch_compiler will re-direct if ${CMAKE_CXX_COMPILER} and -DKOKKOS_DEPENDENCE is present,
# otherwise, the original command will be executed

View File

@ -23,8 +23,6 @@
#cmakedefine KOKKOS_ENABLE_CUDA
#cmakedefine KOKKOS_ENABLE_HIP
#cmakedefine KOKKOS_ENABLE_HPX
#cmakedefine KOKKOS_ENABLE_MEMKIND
#cmakedefine KOKKOS_ENABLE_LIBRT
#cmakedefine KOKKOS_ENABLE_SYCL
#cmakedefine KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED
@ -32,6 +30,7 @@
#cmakedefine KOKKOS_ENABLE_CXX17
#cmakedefine KOKKOS_ENABLE_CXX20
#cmakedefine KOKKOS_ENABLE_CXX23
#cmakedefine KOKKOS_ENABLE_CXX26
#cmakedefine KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
#cmakedefine KOKKOS_ENABLE_CUDA_UVM
@ -45,7 +44,6 @@
#cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
#cmakedefine KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
#cmakedefine KOKKOS_ENABLE_TUNING
#cmakedefine KOKKOS_ENABLE_DEPRECATED_CODE_3
#cmakedefine KOKKOS_ENABLE_DEPRECATED_CODE_4
#cmakedefine KOKKOS_ENABLE_DEPRECATION_WARNINGS
#cmakedefine KOKKOS_ENABLE_LARGE_MEM_TESTS
@ -53,17 +51,15 @@
#cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION // deprecated
#cmakedefine KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION
#cmakedefine KOKKOS_ENABLE_IMPL_MDSPAN
#cmakedefine KOKKOS_ENABLE_ATOMICS_BYPASS
/* TPL Settings */
#cmakedefine KOKKOS_ENABLE_HWLOC
#cmakedefine KOKKOS_USE_LIBRT
#cmakedefine KOKKOS_ENABLE_HBWSPACE
#cmakedefine KOKKOS_ENABLE_LIBDL
#cmakedefine KOKKOS_ENABLE_LIBQUADMATH
#cmakedefine KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
#cmakedefine KOKKOS_ENABLE_ONEDPL
#cmakedefine KOKKOS_ENABLE_ROCTHRUST
#cmakedefine KOKKOS_ARCH_SSE42
#cmakedefine KOKKOS_ARCH_ARMV80
#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX
#cmakedefine KOKKOS_ARCH_ARMV81
@ -78,6 +74,7 @@
#cmakedefine KOKKOS_ARCH_POWER7
#cmakedefine KOKKOS_ARCH_POWER8
#cmakedefine KOKKOS_ARCH_POWER9
#cmakedefine KOKKOS_ARCH_RISCV_SG2042
#cmakedefine KOKKOS_ARCH_INTEL_GEN
#cmakedefine KOKKOS_ARCH_INTEL_DG1
#cmakedefine KOKKOS_ARCH_INTEL_GEN9

View File

@ -7,7 +7,8 @@ IF (NOT CUDAToolkit_ROOT)
ENDIF()
ENDIF()
IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0")
# FIXME CMake 3.28.4 creates more targets than we export
IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0" AND CMAKE_VERSION VERSION_LESS "3.28.4")
find_package(CUDAToolkit)
ELSE()
include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)

View File

@ -1 +0,0 @@
KOKKOS_FIND_IMPORTED(LIBRT HEADER time.h LIBRARY rt)

View File

@ -1 +0,0 @@
KOKKOS_FIND_IMPORTED(MEMKIND HEADER memkind.h LIBRARY memkind)

View File

@ -43,4 +43,7 @@ ELSE()
COMPILE_DEFINITIONS PSTL_USE_PARALLEL_POLICIES=0 _GLIBCXX_USE_TBB_PAR_BACKEND=0
)
ENDIF()
# Export oneDPL as a Kokkos dependency
KOKKOS_EXPORT_CMAKE_TPL(oneDPL)
ENDIF()

View File

@ -0,0 +1,15 @@
# ROCm 5.6 and earlier set AMDGPU_TARGETS and GPU_TARGETS to all the supported
# architectures. Therefore, we end up compiling Kokkos for all the supported
# architecture. Starting with ROCm 5.7 AMDGPU_TARGETS and GPU_TARGETS are empty.
# It is the user's job to set the variables. Since we are injecting the
# architecture flag ourselves, we can let the variables empty. To replicate the
# behavior of ROCm 5.7 and later for earlier version of ROCm we set
# AMDGPU_TARGETS and GPU_TARGETS to empty and set the values in the cache. If
# the values are not cached, FIND_PACKAGE(rocthrust) will overwrite them.
SET(AMDGPU_TARGETS "" CACHE STRING "AMD GPU targets to compile for")
SET(GPU_TARGETS "" CACHE STRING "GPU targets to compile for")
FIND_PACKAGE(rocthrust REQUIRED)
KOKKOS_CREATE_IMPORTED_TPL(ROCTHRUST INTERFACE LINK_LIBRARIES roc::rocthrust)
# Export ROCTHRUST as a Kokkos dependency
KOKKOS_EXPORT_CMAKE_TPL(rocthrust)

View File

@ -49,7 +49,6 @@ DECLARE_AND_CHECK_HOST_ARCH(ARMV81 "ARMv8.1 Compatible CPU")
DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX "ARMv8 Cavium ThunderX CPU")
DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX2 "ARMv8 Cavium ThunderX2 CPU")
DECLARE_AND_CHECK_HOST_ARCH(A64FX "ARMv8.2 with SVE Support")
DECLARE_AND_CHECK_HOST_ARCH(WSM "Intel Westmere CPU")
DECLARE_AND_CHECK_HOST_ARCH(SNB "Intel Sandy/Ivy Bridge CPUs")
DECLARE_AND_CHECK_HOST_ARCH(HSW "Intel Haswell CPUs")
DECLARE_AND_CHECK_HOST_ARCH(BDW "Intel Broadwell Xeon E-class CPUs")
@ -60,13 +59,12 @@ DECLARE_AND_CHECK_HOST_ARCH(SKX "Intel Skylake Xeon Server CPUs (A
DECLARE_AND_CHECK_HOST_ARCH(KNC "Intel Knights Corner Xeon Phi")
DECLARE_AND_CHECK_HOST_ARCH(KNL "Intel Knights Landing Xeon Phi")
DECLARE_AND_CHECK_HOST_ARCH(SPR "Intel Sapphire Rapids Xeon Server CPUs (AVX512)")
DECLARE_AND_CHECK_HOST_ARCH(BGQ "IBM Blue Gene Q")
DECLARE_AND_CHECK_HOST_ARCH(POWER7 "IBM POWER7 CPUs")
DECLARE_AND_CHECK_HOST_ARCH(POWER8 "IBM POWER8 CPUs")
DECLARE_AND_CHECK_HOST_ARCH(POWER9 "IBM POWER9 CPUs")
DECLARE_AND_CHECK_HOST_ARCH(ZEN "AMD Zen architecture")
DECLARE_AND_CHECK_HOST_ARCH(ZEN2 "AMD Zen2 architecture")
DECLARE_AND_CHECK_HOST_ARCH(ZEN3 "AMD Zen3 architecture")
DECLARE_AND_CHECK_HOST_ARCH(RISCV_SG2042 "SG2042 (RISC-V) CPUs")
IF(Kokkos_ENABLE_CUDA OR Kokkos_ENABLE_OPENMPTARGET OR Kokkos_ENABLE_OPENACC OR Kokkos_ENABLE_SYCL)
SET(KOKKOS_SHOW_CUDA_ARCHS ON)
@ -191,9 +189,6 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
ELSEIF(CUDAToolkit_BIN_DIR)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS --cuda-path=${CUDAToolkit_BIN_DIR}/..)
ENDIF()
IF (KOKKOS_ENABLE_CUDA)
SET(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND ON CACHE BOOL "enable CUDA Clang workarounds" FORCE)
ENDIF()
ELSEIF (KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
SET(CUDA_ARCH_FLAG "-gpu")
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -cuda)
@ -342,18 +337,6 @@ IF (KOKKOS_ARCH_ZEN3)
SET(KOKKOS_ARCH_AVX2 ON)
ENDIF()
IF (KOKKOS_ARCH_WSM)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Cray NO-VALUE-SPECIFIED
Intel -xSSE4.2
MSVC NO-VALUE-SPECIFIED
NVHPC -tp=px
DEFAULT -msse4.2
)
SET(KOKKOS_ARCH_SSE42 ON)
ENDIF()
IF (KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX)
SET(KOKKOS_ARCH_AVX ON)
COMPILER_SPECIFIC_FLAGS(
@ -378,6 +361,23 @@ IF (KOKKOS_ARCH_HSW)
)
ENDIF()
IF (KOKKOS_ARCH_RISCV_SG2042)
IF(NOT
(KOKKOS_CXX_COMPILER_ID STREQUAL GNU
AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12)
OR
(KOKKOS_CXX_COMPILER_ID STREQUAL Clang
AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 14)
)
MESSAGE(SEND_ERROR "Only gcc >= 12 and clang >= 14 support RISC-V.")
ENDIF()
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
DEFAULT -march=rv64imafdcv
)
ENDIF()
IF (KOKKOS_ARCH_BDW)
SET(KOKKOS_ARCH_AVX2 ON)
COMPILER_SPECIFIC_FLAGS(
@ -571,6 +571,11 @@ IF (KOKKOS_ENABLE_HIP)
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fgpu-rdc
)
IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
COMPILER_SPECIFIC_LINK_OPTIONS(
DEFAULT --hip-link
)
ENDIF()
ELSE()
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fno-gpu-rdc
@ -588,32 +593,44 @@ IF (KOKKOS_ENABLE_SYCL)
ENDIF()
# Check support for device_global variables
# FIXME_SYCL Once the feature test macro SYCL_EXT_ONEAPI_DEVICE_GLOBAL is
# available, use that instead.
IF(KOKKOS_ENABLE_SYCL AND NOT BUILD_SHARED_LIBS)
INCLUDE(CheckCXXSourceCompiles)
# FIXME_SYCL If SYCL_EXT_ONEAPI_DEVICE_GLOBAL is defined, we can use device
# global variables with shared libraries using the "non-separable compilation"
# implementation. Otherwise, the feature is not supported when building shared
# libraries. Thus, we don't even check for support if shared libraries are
# requested and SYCL_EXT_ONEAPI_DEVICE_GLOBAL is not defined.
IF(KOKKOS_ENABLE_SYCL)
STRING(REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${KOKKOS_COMPILE_OPTIONS}")
CHECK_CXX_SOURCE_COMPILES("
#include <sycl/sycl.hpp>
using namespace sycl::ext::oneapi::experimental;
using namespace sycl;
INCLUDE(CheckCXXSymbolExists)
CHECK_CXX_SYMBOL_EXISTS(SYCL_EXT_ONEAPI_DEVICE_GLOBAL "sycl/sycl.hpp" KOKKOS_IMPL_HAVE_SYCL_EXT_ONEAPI_DEVICE_GLOBAL)
IF (KOKKOS_IMPL_HAVE_SYCL_EXT_ONEAPI_DEVICE_GLOBAL)
SET(KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED ON)
# Use the non-separable compilation implementation to support shared libraries as well.
COMPILER_SPECIFIC_FLAGS(DEFAULT -DDESUL_SYCL_DEVICE_GLOBAL_SUPPORTED)
ELSEIF(NOT BUILD_SHARED_LIBS)
INCLUDE(CheckCXXSourceCompiles)
CHECK_CXX_SOURCE_COMPILES("
#include <sycl/sycl.hpp>
using namespace sycl::ext::oneapi::experimental;
using namespace sycl;
SYCL_EXTERNAL device_global<int, decltype(properties(device_image_scope))> Foo;
SYCL_EXTERNAL device_global<int, decltype(properties(device_image_scope))> Foo;
void bar(queue q) {
q.single_task([=] {
Foo = 42;
});
}
void bar(queue q) {
q.single_task([=] {
Foo = 42;
});
}
int main(){ return 0; }
"
KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED)
int main(){ return 0; }
"
KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED)
IF(KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED)
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fsycl-device-code-split=off -DDESUL_SYCL_DEVICE_GLOBAL_SUPPORTED
)
IF(KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED)
# Only the separable compilation implementation is supported.
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fsycl-device-code-split=off -DDESUL_SYCL_DEVICE_GLOBAL_SUPPORTED
)
ENDIF()
ENDIF()
ENDIF()
@ -767,30 +784,35 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
COMPILER_SPECIFIC_FLAGS(
IntelLLVM -fopenmp-targets=spir64 -D__STRICT_ANSI__
)
ELSEIF(KOKKOS_ARCH_INTEL_GEN9)
COMPILER_SPECIFIC_FLAGS(
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen9" -D__STRICT_ANSI__
ELSE()
COMPILER_SPECIFIC_OPTIONS(
IntelLLVM -fopenmp-targets=spir64_gen -D__STRICT_ANSI__
)
ELSEIF(KOKKOS_ARCH_INTEL_GEN11)
COMPILER_SPECIFIC_FLAGS(
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen11" -D__STRICT_ANSI__
)
ELSEIF(KOKKOS_ARCH_INTEL_GEN12LP)
COMPILER_SPECIFIC_FLAGS(
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen12lp" -D__STRICT_ANSI__
)
ELSEIF(KOKKOS_ARCH_INTEL_DG1)
COMPILER_SPECIFIC_FLAGS(
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device dg1" -D__STRICT_ANSI__
)
ELSEIF(KOKKOS_ARCH_INTEL_XEHP)
COMPILER_SPECIFIC_FLAGS(
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device 12.50.4" -D__STRICT_ANSI__
)
ELSEIF(KOKKOS_ARCH_INTEL_PVC)
COMPILER_SPECIFIC_FLAGS(
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device 12.60.7" -D__STRICT_ANSI__
IF(KOKKOS_ARCH_INTEL_GEN9)
COMPILER_SPECIFIC_LINK_OPTIONS(
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen9"
)
ELSEIF(KOKKOS_ARCH_INTEL_GEN11)
COMPILER_SPECIFIC_LINK_OPTIONS(
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen11"
)
ELSEIF(KOKKOS_ARCH_INTEL_GEN12LP)
COMPILER_SPECIFIC_LINK_OPTIONS(
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen12lp"
)
ELSEIF(KOKKOS_ARCH_INTEL_DG1)
COMPILER_SPECIFIC_LINK_OPTIONS(
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device dg1"
)
ELSEIF(KOKKOS_ARCH_INTEL_XEHP)
COMPILER_SPECIFIC_LINK_OPTIONS(
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device 12.50.4"
)
ELSEIF(KOKKOS_ARCH_INTEL_PVC)
COMPILER_SPECIFIC_LINK_OPTIONS(
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device 12.60.7"
)
ENDIF()
ENDIF()
ENDIF()
@ -1130,3 +1152,14 @@ MESSAGE(STATUS "Architectures:")
FOREACH(Arch ${KOKKOS_ENABLED_ARCH_LIST})
MESSAGE(STATUS " ${Arch}")
ENDFOREACH()
IF(KOKKOS_ENABLE_ATOMICS_BYPASS)
IF(NOT _HOST_PARALLEL STREQUAL "NoTypeDefined" OR NOT _DEVICE_PARALLEL STREQUAL "NoTypeDefined")
MESSAGE(FATAL_ERROR "Not allowed to disable atomics (via -DKokkos_ENABLE_AROMICS_BYPASS=ON) if neither a host parallel nor a device backend is enabled!")
ENDIF()
IF(NOT KOKKOS_ENABLE_SERIAL)
MESSAGE(FATAL_ERROR "Implementation bug") # safeguard
ENDIF()
MESSAGE(STATUS "Atomics: **DISABLED**")
ENDIF()

View File

@ -152,6 +152,7 @@ ENDIF()
SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) 8.0.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) 10.0.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) 15.0.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 8.2.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 19.0.5 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) 2021.1.1 or higher")
@ -210,6 +211,10 @@ ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL XL OR KOKKOS_CXX_COMPILER_ID STREQUAL XLClang)
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_OPENMPTARGET)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 15.0.0)
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
ENDIF()
IF(NOT DEFINED KOKKOS_CXX_HOST_COMPILER_ID)

View File

@ -48,7 +48,6 @@ KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to allow lambda
# resolved but we keep the option around a bit longer to be safe.
KOKKOS_ENABLE_OPTION(IMPL_CUDA_MALLOC_ASYNC ON "Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2)")
KOKKOS_ENABLE_OPTION(IMPL_NVHPC_AS_DEVICE_COMPILER OFF "Whether to allow nvc++ as Cuda device compiler")
KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_3 OFF "Whether code deprecated in major release 3 is available" )
KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_4 ON "Whether code deprecated in major release 4 is available" )
KOKKOS_ENABLE_OPTION(DEPRECATION_WARNINGS ON "Whether to emit deprecation warnings" )
KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP")
@ -74,6 +73,7 @@ KOKKOS_ENABLE_OPTION(HIP_MULTIPLE_KERNEL_INSTANTIATIONS OFF "Whether multiple ke
# This option will go away eventually, but allows fallback to old implementation when needed.
KOKKOS_ENABLE_OPTION(DESUL_ATOMICS_EXTERNAL OFF "Whether to use an external desul installation")
KOKKOS_ENABLE_OPTION(ATOMICS_BYPASS OFF "**NOT RECOMMENDED** Whether to make atomics non-atomic for non-threaded MPI-only use cases")
KOKKOS_ENABLE_OPTION(IMPL_MDSPAN OFF "Whether to enable experimental mdspan support")
KOKKOS_ENABLE_OPTION(MDSPAN_EXTERNAL OFF BOOL "Whether to use an external version of mdspan")

View File

@ -7,6 +7,7 @@ KOKKOS_OPTION(CXX_STANDARD "" STRING "[[DEPRECATED - USE CMAKE_CXX_STANDARD INST
SET(KOKKOS_ENABLE_CXX17 OFF)
SET(KOKKOS_ENABLE_CXX20 OFF)
SET(KOKKOS_ENABLE_CXX23 OFF)
SET(KOKKOS_ENABLE_CXX26 OFF)
IF (KOKKOS_CXX_STANDARD)
MESSAGE(FATAL_ERROR "Setting the variable Kokkos_CXX_STANDARD in configuration is deprecated - set CMAKE_CXX_STANDARD directly instead")
ENDIF()

View File

@ -74,6 +74,10 @@ ELSEIF(KOKKOS_CXX_STANDARD STREQUAL "23")
kokkos_set_cxx_standard_feature(23)
SET(KOKKOS_CXX_INTERMEDIATE_STANDARD "2B")
SET(KOKKOS_ENABLE_CXX23 ON)
ELSEIF(KOKKOS_CXX_STANDARD STREQUAL "26")
kokkos_set_cxx_standard_feature(26)
SET(KOKKOS_CXX_INTERMEDIATE_STANDARD "2C")
SET(KOKKOS_ENABLE_CXX26 ON)
ELSE()
MESSAGE(FATAL_ERROR "Kokkos requires C++17 or newer but requested ${KOKKOS_CXX_STANDARD}!")
ENDIF()

View File

@ -32,19 +32,21 @@ FUNCTION(KOKKOS_TPL_OPTION PKG DEFAULT)
ENDFUNCTION()
KOKKOS_TPL_OPTION(HWLOC Off TRIBITS HWLOC)
KOKKOS_TPL_OPTION(MEMKIND Off)
IF(KOKKOS_ENABLE_MEMKIND)
SET(KOKKOS_ENABLE_HBWSPACE ON)
ENDIF()
KOKKOS_TPL_OPTION(CUDA ${Kokkos_ENABLE_CUDA} TRIBITS CUDA)
KOKKOS_TPL_OPTION(LIBRT Off)
IF(KOKKOS_ENABLE_HIP AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC AND NOT
KOKKOS_HAS_TRILINOS)
SET(ROCM_DEFAULT ON)
ELSE()
SET(ROCM_DEFAULT OFF)
ENDIF()
IF(KOKKOS_ENABLE_HIP AND NOT KOKKOS_HAS_TRILINOS)
SET(ROCTHRUST_DEFAULT ON)
ELSE()
SET(ROCTHRUST_DEFAULT OFF)
ENDIF()
KOKKOS_TPL_OPTION(ROCM ${ROCM_DEFAULT})
KOKKOS_TPL_OPTION(ROCTHRUST ${ROCTHRUST_DEFAULT})
IF(KOKKOS_ENABLE_SYCL AND NOT KOKKOS_HAS_TRILINOS)
SET(ONEDPL_DEFAULT ON)
ELSE()
@ -77,21 +79,18 @@ KOKKOS_TPL_OPTION(LIBQUADMATH ${LIBQUADMATH_DEFAULT} TRIBITS quadmath)
#Make sure we use our local FindKokkosCuda.cmake
KOKKOS_IMPORT_TPL(HPX INTERFACE)
IF (NOT KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
KOKKOS_IMPORT_TPL(CUDA INTERFACE)
ENDIF()
KOKKOS_IMPORT_TPL(CUDA INTERFACE)
KOKKOS_IMPORT_TPL(HWLOC)
KOKKOS_IMPORT_TPL(LIBRT)
KOKKOS_IMPORT_TPL(LIBDL)
KOKKOS_IMPORT_TPL(MEMKIND)
IF (NOT WIN32)
KOKKOS_IMPORT_TPL(THREADS INTERFACE)
ENDIF()
IF (NOT KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
KOKKOS_IMPORT_TPL(ROCM INTERFACE)
KOKKOS_IMPORT_TPL(ONEDPL INTERFACE)
ENDIF()
KOKKOS_IMPORT_TPL(ONEDPL INTERFACE)
KOKKOS_IMPORT_TPL(LIBQUADMATH)
KOKKOS_IMPORT_TPL(ROCTHRUST)
IF (Kokkos_ENABLE_DESUL_ATOMICS_EXTERNAL)
find_package(desul REQUIRED COMPONENTS atomics)
@ -119,7 +118,3 @@ STRING(REPLACE ";" "\n" KOKKOS_TPL_EXPORT_TEMP "${KOKKOS_TPL_EXPORTS}")
#Convert to a regular variable
UNSET(KOKKOS_TPL_EXPORTS CACHE)
SET(KOKKOS_TPL_EXPORTS ${KOKKOS_TPL_EXPORT_TEMP})
IF (KOKKOS_ENABLE_MEMKIND)
SET(KOKKOS_ENABLE_HBWSPACE)
LIST(APPEND KOKKOS_MEMSPACE_LIST HBWSpace)
ENDIF()

View File

@ -237,18 +237,10 @@ ENDMACRO()
## KOKKOS_DECLARE is the declaration set
## KOKKOS_POST_INCLUDE is included at the end of Kokkos_Core.hpp
MACRO(KOKKOS_CONFIGURE_CORE)
SET(FWD_BACKEND_LIST)
FOREACH(MEMSPACE ${KOKKOS_MEMSPACE_LIST})
LIST(APPEND FWD_BACKEND_LIST ${MEMSPACE})
ENDFOREACH()
FOREACH(BACKEND_ ${KOKKOS_ENABLED_DEVICES})
LIST(APPEND FWD_BACKEND_LIST ${BACKEND_})
ENDFOREACH()
MESSAGE(STATUS "Kokkos Devices: ${KOKKOS_ENABLED_DEVICES}, Kokkos Backends: ${FWD_BACKEND_LIST}")
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_FwdBackend.hpp "KOKKOS_FWD" "fwd/Kokkos_Fwd" "${FWD_BACKEND_LIST}")
MESSAGE(STATUS "Kokkos Backends: ${KOKKOS_ENABLED_DEVICES}")
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_FwdBackend.hpp "KOKKOS_FWD" "fwd/Kokkos_Fwd" "${KOKKOS_ENABLED_DEVICES}")
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_SetupBackend.hpp "KOKKOS_SETUP" "setup/Kokkos_Setup" "${DEVICE_SETUP_LIST}")
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_DeclareBackend.hpp "KOKKOS_DECLARE" "decl/Kokkos_Declare" "${FWD_BACKEND_LIST}")
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_PostInclude.hpp "KOKKOS_POST_INCLUDE" "Kokkos_Post_Include" "${KOKKOS_BACKEND_POST_INCLUDE_LIST}")
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_DeclareBackend.hpp "KOKKOS_DECLARE" "decl/Kokkos_Declare" "${KOKKOS_ENABLED_DEVICES}")
SET(_DEFAULT_HOST_MEMSPACE "::Kokkos::HostSpace")
KOKKOS_OPTION(DEFAULT_DEVICE_MEMORY_SPACE "" STRING "Override default device memory space")
KOKKOS_OPTION(DEFAULT_HOST_MEMORY_SPACE "" STRING "Override default host memory space")
@ -309,7 +301,6 @@ MACRO(KOKKOS_INSTALL_ADDITIONAL_FILES)
"${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_Config_FwdBackend.hpp"
"${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_Config_SetupBackend.hpp"
"${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_Config_DeclareBackend.hpp"
"${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_Config_PostInclude.hpp"
DESTINATION ${KOKKOS_HEADER_DIR})
ENDMACRO()

View File

@ -1,773 +0,0 @@
#!/bin/bash -e
#
# Global config
#
set -o pipefail
# Determine current machine.
MACHINE=""
HOSTNAME=$(hostname)
PROCESSOR=`uname -p`
if [[ "$HOSTNAME" =~ (white|ride).* ]]; then
MACHINE=white
module load git
fi
if [[ "$HOSTNAME" =~ .*bowman.* ]]; then
MACHINE=bowman
module load git
fi
if [[ "$HOSTNAME" == n* ]]; then # Warning: very generic name
if [[ "$PROCESSOR" = "aarch64" ]]; then
MACHINE=sullivan
module load git
fi
fi
if [[ "$HOSTNAME" == node* ]]; then # Warning: very generic name
if [[ "$MACHINE" = "" ]]; then
MACHINE=shepard
module load git
fi
fi
if [[ "$HOSTNAME" == apollo\.* ]]; then
MACHINE=apollo
module load git
fi
if [[ "$HOSTNAME" == sullivan ]]; then
MACHINE=sullivan
module load git
fi
if [[ "$HOSTNAME" == mayer\.* ]]; then
MACHINE=mayer
# module load git
fi
if [[ "$HOSTNAME" == cn* ]]; then # Warning: very generic name
MACHINE=mayer
fi
if [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
if [[ "$MACHINE" = "" ]]; then
MACHINE=sems
module load sems-git
fi
fi
if [[ "$MACHINE" = "" ]]; then
echo "Unrecognized machine" >&2
exit 1
fi
echo "Running on machine: $MACHINE"
GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
ARM_GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial"
GCC_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
IBM_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CLANG_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
INTEL_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
#CUDA_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CUDA_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
PGI_WARNING_FLAGS=""
# Default. Machine specific can override.
DEBUG=False
ARGS=""
CUSTOM_BUILD_LIST=""
DRYRUN=False
BUILD_ONLY=False
declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=1
TEST_SCRIPT=False
SKIP_HWLOC=False
SPOT_CHECK=False
PRINT_HELP=False
OPT_FLAG=""
CXX_FLAGS_EXTRA=""
LD_FLAGS_EXTRA=""
KOKKOS_OPTIONS=""
#
# Handle arguments.
#
while [[ $# > 0 ]]
do
key="$1"
case $key in
--kokkos-path*)
KOKKOS_PATH="${key#*=}"
;;
--build-list*)
CUSTOM_BUILD_LIST="${key#*=}"
;;
--debug*)
DEBUG=True
;;
--build-only*)
BUILD_ONLY=True
;;
--test-script*)
TEST_SCRIPT=True
;;
--skip-hwloc*)
SKIP_HWLOC=True
;;
--num*)
NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}"
;;
--dry-run*)
DRYRUN=True
;;
--spot-check*)
SPOT_CHECK=True
;;
--arch*)
ARCH_FLAG="--arch=${key#*=}"
;;
--opt-flag*)
OPT_FLAG="${key#*=}"
;;
--with-cuda-options*)
KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}"
;;
--with-options*)
KOKKOS_OPTIONS="--with-options=enable_large_mem_tests,${key#*=}"
;;
--cxxflags-extra*)
CXX_FLAGS_EXTRA="${key#*=}"
;;
--ldflags-extra*)
LD_FLAGS_EXTRA="${key#*=}"
;;
--help*)
PRINT_HELP=True
;;
*)
# args, just append
ARGS="$ARGS $1"
;;
esac
shift
done
SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )
# Set kokkos path.
if [ -z "$KOKKOS_PATH" ]; then
KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
else
# Ensure KOKKOS_PATH is abs path.
KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
fi
UNCOMMITTED=`cd ${KOKKOS_PATH}; git status --porcelain 2>/dev/null`
if ! [ -z "$UNCOMMITTED" ]; then
echo "WARNING!! THE FOLLOWING CHANGES ARE UNCOMMITTED!! :"
echo "$UNCOMMITTED"
echo ""
fi
GITSTATUS=`cd ${KOKKOS_PATH}; git log -n 1 --format=oneline`
echo "Repository Status: " ${GITSTATUS}
echo ""
echo ""
#
# Machine specific config.
#
if [ "$MACHINE" = "sems" ]; then
source /projects/sems/modulefiles/utils/sems-modules-init.sh
BASE_MODULE_LIST="sems-env,kokkos-env,kokkos-hwloc/1.10.1/base,sems-<COMPILER_NAME>/<COMPILER_VERSION>"
CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG=""
fi
if [ "$SPOT_CHECK" = "True" ]; then
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS"
"gcc/6.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
"intel/17.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
"cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
else
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/16.0.3 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
fi
elif [ "$MACHINE" = "white" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=32
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/6.4.0,ibm/xl/16.1.0"
# Don't do pthread on white.
GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/6.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
"cuda/9.0.103 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=Power8,Kepler37"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=1
elif [ "$MACHINE" = "bowman" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=32
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("intel/16.4.258 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.2.174 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/18.0.128 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
)
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=KNL"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=1
elif [ "$MACHINE" = "sullivan" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=96
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/6.1.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS")
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=ARMv8-ThunderX"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=1
elif [ "$MACHINE" = "mayer" ]; then
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=96
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
ARM_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/7.2.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"arm/1.4.0 $ARM_MODULE_LIST $ARM_GCC_BUILD_LIST armclang++ $CLANG_WARNING_FLAGS")
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=ARMv8-TX2"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=1
elif [ "$MACHINE" = "shepard" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=32
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
BASE_MODULE_LIST_INTEL="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("intel/17.4.196 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/18.0.128 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"pgi/17.10.0 $BASE_MODULE_LIST $GCC_BUILD_LIST pgc++ $PGI_WARNING_FLAGS"
)
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=HSW"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=1
elif [ "$MACHINE" = "apollo" ]; then
source /projects/sems/modulefiles/utils/sems-modules-init.sh
module use /home/projects/modulefiles/local/x86-64
module load kokkos-env
module load sems-git
module load sems-tex
module load sems-cmake/3.5.2
module load sems-gdb
SKIP_HWLOC=True
BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/9.0.69"
NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0"
BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP"
BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread"
BUILD_LIST_CLANG="Serial,Pthread,OpenMP"
if [ "$SPOT_CHECK" = "True" ]; then
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
"gcc/5.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
"clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread,OpenMP" clang++ $CUDA_WARNING_FLAGS"
"cuda/9.1 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
else
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("cuda/9.1 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"clang/6.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
"clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
)
fi
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=SNB,Volta70"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=1
else
echo "Unhandled machine $MACHINE" >&2
exit 1
fi
export OMP_NUM_THREADS=4
declare -i NUM_RESULTS_TO_KEEP=7
RESULT_ROOT_PREFIX=TestAll
if [ "$PRINT_HELP" = "True" ]; then
echo "test_all_sandia <ARGS> <OPTIONS>:"
echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory"
echo " Defaults to root repo containing this script"
echo "--debug: Run tests in debug. Defaults to False"
echo "--test-script: Test this script, not Kokkos"
echo "--skip-hwloc: Do not do hwloc tests"
echo "--num=N: Number of jobs to run in parallel"
echo "--spot-check: Minimal test set to issue pull request"
echo "--dry-run: Just print what would be executed"
echo "--build-only: Just do builds, don't run anything"
echo "--opt-flag=FLAG: Optimization flag (default: -O3)"
echo "--cxxflags-extra=FLAGS: Extra flags to be added to CXX_FLAGS"
echo "--ldflags-extra=FLAGS: Extra flags to be added to LD_FLAGS"
echo "--arch=ARCHITECTURE: overwrite architecture flags"
echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS"
echo "--build-list=BUILD,BUILD,BUILD..."
echo " Provide a comma-separated list of builds instead of running all builds"
echo " Valid items:"
echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial"
echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial"
echo ""
echo "ARGS: list of expressions matching compilers to test"
echo " supported compilers sems"
for COMPILER_DATA in "${COMPILERS[@]}"; do
ARR=($COMPILER_DATA)
COMPILER=${ARR[0]}
echo " $COMPILER"
done
echo ""
echo "Examples:"
echo " Run all tests"
echo " % test_all_sandia"
echo ""
echo " Run all gcc tests"
echo " % test_all_sandia gcc"
echo ""
echo " Run all gcc/4.8.4 and all intel tests"
echo " % test_all_sandia gcc/4.8.4 intel"
echo ""
echo " Run all tests in debug"
echo " % test_all_sandia --debug"
echo ""
echo " Run gcc/4.8.4 and only do OpenMP and OpenMP_Serial builds"
echo " % test_all_sandia gcc/4.8.4 --build-list=OpenMP,OpenMP_Serial"
echo ""
echo "If you want to kill the tests, do:"
echo " hit ctrl-z"
echo " % kill -9 %1"
echo
exit 0
fi
# Set build type.
if [ "$DEBUG" = "True" ]; then
BUILD_TYPE=debug
else
BUILD_TYPE=release
fi
# If no args provided, do all compilers.
if [ -z "$ARGS" ]; then
ARGS='?'
fi
# Process args to figure out which compilers to test.
COMPILERS_TO_TEST=""
for ARG in $ARGS; do
for COMPILER_DATA in "${COMPILERS[@]}"; do
ARR=($COMPILER_DATA)
COMPILER=${ARR[0]}
if [[ "$COMPILER" = $ARG* ]]; then
if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then
COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER"
else
echo "Tried to add $COMPILER twice"
fi
fi
done
done
#
# Functions.
#
# get_compiler_name <COMPILER>
get_compiler_name() {
echo $1 | cut -d/ -f1
}
# get_compiler_version <COMPILER>
get_compiler_version() {
echo $1 | cut -d/ -f2
}
# Do not call directly.
get_compiler_data() {
local compiler=$1
local item=$2
local compiler_name=$(get_compiler_name $compiler)
local compiler_vers=$(get_compiler_version $compiler)
local compiler_data
for compiler_data in "${COMPILERS[@]}" ; do
local arr=($compiler_data)
if [ "$compiler" = "${arr[0]}" ]; then
echo "${arr[$item]}" | tr , ' ' | sed -e "s/<COMPILER_NAME>/$compiler_name/g" -e "s/<COMPILER_VERSION>/$compiler_vers/g"
return 0
fi
done
# Not found.
echo "Unreconized compiler $compiler" >&2
exit 1
}
#
# For all getters, usage: <GETTER> <COMPILER>
#
get_compiler_modules() {
get_compiler_data $1 1
}
get_compiler_build_list() {
get_compiler_data $1 2
}
get_compiler_exe_name() {
get_compiler_data $1 3
}
get_compiler_warning_flags() {
get_compiler_data $1 4
}
run_cmd() {
echo "RUNNING: $*"
if [ "$DRYRUN" != "True" ]; then
eval "$* 2>&1"
fi
}
# report_and_log_test_results <SUCCESS> <DESC> <COMMENT>
report_and_log_test_result() {
# Use sane var names.
local success=$1; local desc=$2; local comment=$3;
if [ "$success" = "0" ]; then
echo " PASSED $desc"
echo $comment > $PASSED_DIR/$desc
else
# For failures, comment should be the name of the phase that failed.
echo " FAILED $desc" >&2
echo $comment > $FAILED_DIR/$desc
cat ${desc}.${comment}.log
fi
}
setup_env() {
local compiler=$1
local compiler_modules=$(get_compiler_modules $compiler)
module purge
local mod
for mod in $compiler_modules; do
echo "Loading module $mod"
module load $mod 2>&1
# It is ridiculously hard to check for the success of a loaded
# module. Module does not return error codes and piping to grep
# causes module to run in a subshell.
module list 2>&1 | grep "$mod" >& /dev/null || return 1
done
return 0
}
# single_build_and_test <COMPILER> <BUILD> <BUILD_TYPE>
single_build_and_test() {
# Use sane var names.
local compiler=$1; local build=$2; local build_type=$3;
# Set up env.
mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type"
cd $ROOT_DIR/$compiler/"${build}-$build_type"
local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g')
setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
# Set up flags.
local compiler_warning_flags=$(get_compiler_warning_flags $compiler)
local compiler_exe=$(get_compiler_exe_name $compiler)
if [[ "$build_type" = hwloc* ]]; then
local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info)))
fi
if [[ "$OPT_FLAG" = "" ]]; then
OPT_FLAG="-O3"
fi
if [[ "$build_type" = *debug* ]]; then
local extra_args="$extra_args --debug"
local cxxflags="-g $compiler_warning_flags"
local ldflags="-g"
else
local cxxflags="$OPT_FLAG $compiler_warning_flags"
local ldflags="${OPT_FLAG}"
fi
local cxxflags="${cxxflags} ${CXX_FLAGS_EXTRA}"
local ldflags="${ldflags} ${LD_FLAGS_EXTRA}"
if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then
local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS"
fi
if [[ "$KOKKOS_OPTIONS" != "" ]]; then
local extra_args="$extra_args $KOKKOS_OPTIONS"
else
local extra_args="$extra_args --with-options=enable_large_mem_tests"
fi
echo " Starting job $desc"
local comment="no_comment"
if [ "$TEST_SCRIPT" = "True" ]; then
local rand=$[ 1 + $[ RANDOM % 10 ]]
sleep $rand
if [ $rand -gt 5 ]; then
run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; }
fi
else
run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --ldflags=\"$ldflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
local -i build_start_time=$(date +%s)
run_cmd make -j 48 build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
local -i build_end_time=$(date +%s)
comment="build_time=$(($build_end_time-$build_start_time))"
if [[ "$BUILD_ONLY" == False ]]; then
run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; }
local -i run_end_time=$(date +%s)
comment="$comment run_time=$(($run_end_time-$build_end_time))"
fi
fi
report_and_log_test_result 0 $desc "$comment"
return 0
}
# wait_for_jobs <NUM-JOBS>
wait_for_jobs() {
local -i max_jobs=$1
local -i num_active_jobs=$(jobs | wc -l)
while [ $num_active_jobs -ge $max_jobs ]
do
sleep 1
num_active_jobs=$(jobs | wc -l)
jobs >& /dev/null
done
}
# run_in_background <COMPILER> <BUILD> <BUILD_TYPE>
run_in_background() {
local compiler=$1
local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL
# Don't override command line input.
# if [[ "$BUILD_ONLY" == True ]]; then
# num_jobs=8
# else
if [[ "$compiler" == cuda* ]]; then
num_jobs=1
fi
if [[ "$compiler" == clang ]]; then
num_jobs=1
fi
# fi
wait_for_jobs $num_jobs
single_build_and_test $* &
}
# build_and_test_all <COMPILER>
build_and_test_all() {
# Get compiler data.
local compiler=$1
if [ -z "$CUSTOM_BUILD_LIST" ]; then
local compiler_build_list=$(get_compiler_build_list $compiler)
else
local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ')
fi
# Do builds.
local build
for build in $compiler_build_list
do
run_in_background $compiler $build $BUILD_TYPE
# If not cuda, do a hwloc test too.
if [[ "$compiler" != cuda* && "$SKIP_HWLOC" == False ]]; then
run_in_background $compiler $build "hwloc-$BUILD_TYPE"
fi
done
return 0
}
get_test_root_dir() {
local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort)
local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l)
local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP}
if [ $num_to_delete -gt 0 ]; then
/bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete)
fi
echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S")
}
wait_summarize_and_exit() {
wait_for_jobs 1
echo "#######################################################"
echo "PASSED TESTS"
echo "#######################################################"
local passed_test
for passed_test in $(\ls -1 $PASSED_DIR | sort)
do
echo $passed_test $(cat $PASSED_DIR/$passed_test)
done
local -i rv=0
if [ "$(ls -A $FAILED_DIR)" ]; then
echo "#######################################################"
echo "FAILED TESTS"
echo "#######################################################"
local failed_test
for failed_test in $(\ls -1 $FAILED_DIR | sort)
do
echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)"
rv=$rv+1
done
fi
exit $rv
}
#
# Main.
#
ROOT_DIR=$(get_test_root_dir)
mkdir -p $ROOT_DIR
cd $ROOT_DIR
PASSED_DIR=$ROOT_DIR/results/passed
FAILED_DIR=$ROOT_DIR/results/failed
mkdir -p $PASSED_DIR
mkdir -p $FAILED_DIR
echo "Going to test compilers: " $COMPILERS_TO_TEST
for COMPILER in $COMPILERS_TO_TEST; do
echo "Testing compiler $COMPILER"
build_and_test_all $COMPILER
done
wait_summarize_and_exit

View File

@ -1,4 +0,0 @@
packages:
kokkos:
variants: +cuda +openmp +volta70 +cuda_lambda +wrapper ^cuda@10.1
compiler: [gcc@7.2.0]

View File

@ -28,24 +28,6 @@
namespace Kokkos {
namespace Impl {
//! Either append to the label if the property already exists, or set it.
template <typename... P>
auto with_updated_label(const ViewCtorProp<P...>& view_ctor_prop,
const std::string& label) {
using vcp_t = ViewCtorProp<P...>;
//! If the label property is already set, append. Otherwise, set label.
if constexpr (vcp_t::has_label) {
vcp_t new_ctor_props(view_ctor_prop);
static_cast<ViewCtorProp<void, std::string>&>(new_ctor_props)
.value.append(label);
return new_ctor_props;
} else {
return Impl::with_properties_if_unset(view_ctor_prop, label);
}
}
} // namespace Impl
template <typename Device = Kokkos::DefaultExecutionSpace>
class Bitset;
@ -92,9 +74,10 @@ class Bitset {
using block_view_type = View<unsigned*, Device, MemoryTraits<RandomAccess>>;
public:
/// constructor
Bitset() = default;
/// arg_size := number of bit in set
Bitset(unsigned arg_size = 0u) : Bitset(Kokkos::view_alloc(), arg_size) {}
Bitset(unsigned arg_size) : Bitset(Kokkos::view_alloc(), arg_size) {}
template <class... P>
Bitset(const Impl::ViewCtorProp<P...>& arg_prop, unsigned arg_size)
@ -108,9 +91,8 @@ class Bitset {
"Allocation properties should not contain the 'pointer' property.");
//! Update 'label' property and allocate.
const auto prop_copy = Kokkos::Impl::with_updated_label(
Impl::with_properties_if_unset(arg_prop, std::string("Bitset")),
" - blocks");
const auto prop_copy =
Impl::with_properties_if_unset(arg_prop, std::string("Bitset"));
m_blocks =
block_view_type(prop_copy, ((m_size + block_mask) >> block_shift));
@ -310,8 +292,8 @@ class Bitset {
}
private:
unsigned m_size;
unsigned m_last_block_mask;
unsigned m_size = 0;
unsigned m_last_block_mask = 0;
block_view_type m_blocks;
private:

View File

@ -292,15 +292,6 @@ class DualView : public ViewTraits<DataType, Properties...> {
d_view(src.d_view),
h_view(src.h_view) {}
//! Copy assignment operator (shallow copy assignment)
template <typename DT, typename... DP>
DualView& operator=(const DualView<DT, DP...>& src) {
modified_flags = src.modified_flags;
d_view = src.d_view;
h_view = src.h_view;
return *this;
}
//! Subview constructor
template <class DT, class... DP, class Arg0, class... Args>
DualView(const DualView<DT, DP...>& src, const Arg0& arg0, Args... args)

View File

@ -1340,7 +1340,7 @@ class ViewMapping<
template <class MemoryTraits>
struct apply {
static_assert(Kokkos::is_memory_traits<MemoryTraits>::value, "");
static_assert(Kokkos::is_memory_traits<MemoryTraits>::value);
using traits_type =
Kokkos::ViewTraits<data_type, array_layout,
@ -1653,8 +1653,17 @@ KOKKOS_FUNCTION auto as_view_of_rank_n(
Kokkos::abort("Converting DynRankView to a View of mis-matched rank!");)
}
return View<typename RankDataType<T, N>::type, Args...>(
v.data(), v.impl_map().layout());
auto layout = v.impl_map().layout();
if constexpr (std::is_same_v<decltype(layout), Kokkos::LayoutLeft> ||
std::is_same_v<decltype(layout), Kokkos::LayoutRight> ||
std::is_same_v<decltype(layout), Kokkos::LayoutStride> ||
is_layouttiled<decltype(layout)>::value) {
for (int i = N; i < 7; ++i)
layout.dimension[i] = KOKKOS_IMPL_CTOR_DEFAULT_ARG;
}
return View<typename RankDataType<T, N>::type, Args...>(v.data(), layout);
}
template <typename Function, typename... Args>

View File

@ -124,15 +124,8 @@ KOKKOS_INLINE_FUNCTION void offsetview_verify_operator_bounds(
args...);
Kokkos::Impl::throw_runtime_exception(std::string(buffer));))
KOKKOS_IF_ON_DEVICE((
/* Check #1: is there a SharedAllocationRecord?
(we won't use it, but if it is not there then there isn't
a corresponding SharedAllocationHeader containing a label).
This check should cover the case of Views that don't
have the Unmanaged trait but were initialized by pointer. */
if (tracker.has_record()) {
Kokkos::Impl::operator_bounds_error_on_device(map);
} else { Kokkos::abort("OffsetView bounds error"); }))
KOKKOS_IF_ON_DEVICE(
(Kokkos::abort("OffsetView bounds error"); (void)tracker;))
}
}

Some files were not shown because too many files have changed in this diff Show More