Merge branch 'develop' into general-triclinic
This commit is contained in:
@ -45,8 +45,8 @@ if(DOWNLOAD_KOKKOS)
|
||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
|
||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
|
||||
include(ExternalProject)
|
||||
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.2.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
||||
set(KOKKOS_MD5 "16b9b09ae947d434dfb58fc5c87c2b76" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
||||
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.3.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
||||
set(KOKKOS_MD5 "889dcea2b5ced3debdc5b0820044bdc4" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
||||
mark_as_advanced(KOKKOS_URL)
|
||||
mark_as_advanced(KOKKOS_MD5)
|
||||
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
|
||||
@ -71,7 +71,7 @@ if(DOWNLOAD_KOKKOS)
|
||||
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
|
||||
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
|
||||
elseif(EXTERNAL_KOKKOS)
|
||||
find_package(Kokkos 4.2.01 REQUIRED CONFIG)
|
||||
find_package(Kokkos 4.3.00 REQUIRED CONFIG)
|
||||
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
|
||||
else()
|
||||
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
|
||||
|
||||
@ -533,9 +533,6 @@ They must be specified in uppercase.
|
||||
* - A64FX
|
||||
- HOST
|
||||
- ARMv8.2 with SVE Support
|
||||
* - WSM
|
||||
- HOST
|
||||
- Intel Westmere CPU (SSE 4.2)
|
||||
* - SNB
|
||||
- HOST
|
||||
- Intel Sandy/Ivy Bridge CPU (AVX 1)
|
||||
@ -566,18 +563,15 @@ They must be specified in uppercase.
|
||||
* - KNL
|
||||
- HOST
|
||||
- Intel Knights Landing Xeon Phi
|
||||
* - BGQ
|
||||
- HOST
|
||||
- IBM Blue Gene/Q CPU
|
||||
* - POWER7
|
||||
- HOST
|
||||
- IBM POWER7 CPU
|
||||
* - POWER8
|
||||
- HOST
|
||||
- IBM POWER8 CPU
|
||||
* - POWER9
|
||||
- HOST
|
||||
- IBM POWER9 CPU
|
||||
* - RISCV_SG2042
|
||||
- HOST
|
||||
- SG2042 (RISC-V) CPU
|
||||
* - KEPLER30
|
||||
- GPU
|
||||
- NVIDIA Kepler generation CC 3.0 GPU
|
||||
@ -666,7 +660,7 @@ They must be specified in uppercase.
|
||||
- GPU
|
||||
- Intel GPU Ponte Vecchio
|
||||
|
||||
This list was last updated for version 4.2 of the Kokkos library.
|
||||
This list was last updated for version 4.3.0 of the Kokkos library.
|
||||
|
||||
.. tabs::
|
||||
|
||||
|
||||
@ -1,5 +1,105 @@
|
||||
# CHANGELOG
|
||||
|
||||
## [4.3.00](https://github.com/kokkos/kokkos/tree/4.3.00) (2024-03-19)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.01...4.3.00)
|
||||
|
||||
### Features:
|
||||
* Add `Experimental::sort_by_key(exec, keys, values)` algorithm [\#6801](https://github.com/kokkos/kokkos/pull/6801)
|
||||
|
||||
### Backend and Architecture Enhancements:
|
||||
|
||||
#### CUDA:
|
||||
* Experimental multi-GPU support (from the same process) [\#6782](https://github.com/kokkos/kokkos/pull/6782)
|
||||
* Link against CUDA libraries even with KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE [\#6701](https://github.com/kokkos/kokkos/pull/6701)
|
||||
* Don't use the compiler launcher script if the CMake compile language is CUDA. [\#6704](https://github.com/kokkos/kokkos/pull/6704)
|
||||
* nvcc(wrapper): adding "long" and "short" versions for all flags [\#6615](https://github.com/kokkos/kokkos/pull/6615)
|
||||
|
||||
#### HIP:
|
||||
* Fix compilation when using amdclang (with ROCm >= 5.7) and RDC [\#6857](https://github.com/kokkos/kokkos/pull/6857)
|
||||
* Use rocthrust for sorting, when available [\#6793](https://github.com/kokkos/kokkos/pull/6793)
|
||||
|
||||
#### SYCL:
|
||||
* We only support OneAPI SYCL implementation: add check during initialization
|
||||
* Error out on initialization if the backend is different from `ext_oneapi_*` [\#6784](https://github.com/kokkos/kokkos/pull/6784)
|
||||
* Filter GPU devices for `ext_onapi_*` GPU devices [\#6758](https://github.com/kokkos/kokkos/pull/6784)
|
||||
* Performance Improvements
|
||||
* Avoid unnecessary zero-memset of the scratch flags in SYCL [\#6739](https://github.com/kokkos/kokkos/pull/6739)
|
||||
* Use host-pinned memory to copy reduction/scan result [\#6500](https://github.com/kokkos/kokkos/pull/6500)
|
||||
* Address deprecations after oneAPI 2023.2.0 [\#6577](https://github.com/kokkos/kokkos/pull/6739)
|
||||
* Make sure to call find_dependency for oneDPL if necessary [\#6870](https://github.com/kokkos/kokkos/pull/6870)
|
||||
|
||||
#### OpenMPTarget:
|
||||
* Use LLVM extensions for dynamic shared memory [\#6380](https://github.com/kokkos/kokkos/pull/6380)
|
||||
* Guard scratch memory usage in ParallelReduce [\#6585 ](https://github.com/kokkos/kokkos/pull/6585)
|
||||
* Update linker flags for Intel GPUs update [\#6735](https://github.com/kokkos/kokkos/pull/6735)
|
||||
* Improve handling of printf on Intel GPUs [\#6652](https://github.com/kokkos/kokkos/pull/6652)
|
||||
|
||||
#### OpenACC:
|
||||
* Add atomics support [\#6446](https://github.com/kokkos/kokkos/pull/6446)
|
||||
* Make the OpenACC backend asynchronous [\#6772](https://github.com/kokkos/kokkos/pull/6772)
|
||||
|
||||
#### Threads:
|
||||
* Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6446)
|
||||
|
||||
#### OpenMP:
|
||||
* Improve performance of view initializations and filling with zeros [\#6573](https://github.com/kokkos/kokkos/pull/6573)
|
||||
|
||||
### General Enhancements
|
||||
|
||||
* Improve performance of random number generation when using a normal distribution on GPUs [\#6556](https://github.com/kokkos/kokkos/pull/6556)
|
||||
* Allocate temporary view with the user-provided execution space instance and do not initialize in `unique` algorithm [\#6598](https://github.com/kokkos/kokkos/pull/6598)
|
||||
* Add deduction guide for `Kokkos::Array` [\#6373](https://github.com/kokkos/kokkos/pull/6373)
|
||||
* Provide new public headers `<Kokkos_Clamp.hpp>` and `<Kokkos_MinMax.hpp>` [\#6687](https://github.com/kokkos/kokkos/pull/6687)
|
||||
* Fix/improvement to `remove_if` parallel algorithm: use the provided execution space instance for temporary allocations and drop unnecessaryinitialization + avoid evaluating twice the predicate during final pass [\#6747](https://github.com/kokkos/kokkos/pull/6747)
|
||||
* Add runtime function to query the number of devices and make device ID consistent with `KOKKOS_VISIBLE_DEVICES` [\#6713](https://github.com/kokkos/kokkos/pull/6713)
|
||||
* simd: support `vector_aligned_tag` [\#6243](https://github.com/kokkos/kokkos/pull/6243)
|
||||
* Avoid unnecessary allocation when default constructing Bitset [\#6524](https://github.com/kokkos/kokkos/pull/6524)
|
||||
* Fix constness for views in std algorithms [\#6813](https://github.com/kokkos/kokkos/pull/6813)
|
||||
* Improve error message on unsafe implicit conversion in MDRangePolicy [\#6855](https://github.com/kokkos/kokkos/pull/6855)
|
||||
* CTAD (deduction guides) for RangePolicy [\#6850](https://github.com/kokkos/kokkos/pull/6850)
|
||||
* CTAD (deduction guides) for MDRangePolicy [\#5516](https://github.com/kokkos/kokkos/pull/5516)
|
||||
|
||||
### Build System Changes
|
||||
* Require `Kokkos_ENABLE_ATOMICS_BYPASS` option to bypass atomic operation for Serial backend only builds [\#6692](https://github.com/kokkos/kokkos/pull/6692)
|
||||
* Add support for RISCV and the Milk-V's Pioneer [\#6773](https://github.com/kokkos/kokkos/pull/6773)
|
||||
* Add C++26 standard to CMake setup [\#6733](https://github.com/kokkos/kokkos/pull/6733)
|
||||
* Fix Makefile when using gnu_generate_makefile.sh and make >= 4.3 [\#6606](https://github.com/kokkos/kokkos/pull/6606)
|
||||
* Cuda: Fix configuring with CMake >= 3.28.4 - temporary fallback to internal CudaToolkit.cmake [\#6898](https://github.com/kokkos/kokkos/pull/6898)
|
||||
|
||||
### Incompatibilities (i.e. breaking changes)
|
||||
* Remove all `DEPRECATED_CODE_3` option and all code that was guarded by it [\#6523](https://github.com/kokkos/kokkos/pull/6523)
|
||||
* Drop guards to accommodate external code defining `KOKKOS_ASSERT` [\#6665](https://github.com/kokkos/kokkos/pull/6665)
|
||||
* `Profiling::ProfilingSection(std::string)` constructor marked explicit and nodiscard [\#6690](https://github.com/kokkos/kokkos/pull/6690)
|
||||
* Add bound check preconditions for `RangePolicy` and `MDRangePolicy` [\#6617](https://github.com/kokkos/kokkos/pull/6617) [\#6726](https://github.com/kokkos/kokkos/pull/6726)
|
||||
* Add checks for unsafe implicit conversions in RangePolicy [\#6754](https://github.com/kokkos/kokkos/pull/6754)
|
||||
* Remove Kokkos::[b]half_t volatile overloads [\#6579](https://github.com/kokkos/kokkos/pull/6579)
|
||||
* Remove KOKKOS_IMPL_DO_NOT_USE_PRINTF [\#6593](https://github.com/kokkos/kokkos/pull/6593)
|
||||
* Check matching static extents in View constructor [\#5190 ](https://github.com/kokkos/kokkos/pull/5190)
|
||||
* Tools(profiling): fix typo Kokkos_Tools_Optim[i]zationGoal [\#6642](https://github.com/kokkos/kokkos/pull/6642)
|
||||
* Remove variadic range policy constructor (disallow passing multiple trailing chunk size arguments) [\#6845](https://github.com/kokkos/kokkos/pull/6845)
|
||||
* Improve message on view out of bounds access and always abort [\#6861](https://github.com/kokkos/kokkos/pull/6861)
|
||||
* Drop `KOKKOS_ENABLE_INTEL_MM_ALLOC` macro [\#6797](https://github.com/kokkos/kokkos/pull/6797)
|
||||
* Remove `Kokkos::Experimental::LogicalMemorySpace` (without going through deprecation) [\#6557](https://github.com/kokkos/kokkos/pull/6557)
|
||||
* Remove `Experimental::HBWSpace` and support for linking against memkind [\#6791](https://github.com/kokkos/kokkos/pull/6791)
|
||||
* Drop librt TPL and associated `KOKKOS_ENABLE_LIBRT` macro [\#6798](https://github.com/kokkos/kokkos/pull/6798)
|
||||
* Drop support for old CPU architectures (`ARCH_BGQ`, `ARCH_POWER7`, `ARCH_WSM` and associated `ARCH_SSE4` macro) [\#6806](https://github.com/kokkos/kokkos/pull/6806)
|
||||
* Drop support for deprecated command-line arguments and environment variables [\#6744](https://github.com/kokkos/kokkos/pull/6744)
|
||||
|
||||
### Deprecations
|
||||
* Provide kokkos_swap as part of Core and deprecate Experimental::swap in Algorithms [\#6697](https://github.com/kokkos/kokkos/pull/6697)
|
||||
* Deprecate {Cuda,HIP}::detect_device_count() and Cuda::[detect_]device_arch() [\#6710](https://github.com/kokkos/kokkos/pull/6710)
|
||||
* Deprecate `ExecutionSpace::in_parallel()` [\#6582](https://github.com/kokkos/kokkos/pull/6582)
|
||||
|
||||
### Bug Fixes
|
||||
* Fix team-level MDRange reductions: [\#6511](https://github.com/kokkos/kokkos/pull/6511)
|
||||
* Fix CUDA and SYCL small value type (16-bit) team reductions [\#5334](https://github.com/kokkos/kokkos/pull/5334)
|
||||
* Enable `{transform_}exclusive_scan` in place [\#6667](https://github.com/kokkos/kokkos/pull/6667)
|
||||
* `fill_random` overload that do not take an execution space instance argument should fence [\#6658](https://github.com/kokkos/kokkos/pull/6658)
|
||||
* HIP,Cuda,OpenMPTarget: Fixup use provided execution space when copying host inaccessible reduction result [\#6777](https://github.com/kokkos/kokkos/pull/6777)
|
||||
* Fix typo in `cuda_func_set_attribute[s]_wrapper` preventing proper setting of desired occupancy [\#6786](https://github.com/kokkos/kokkos/pull/6786)
|
||||
* Avoid undefined behavior due to conversion between signed and unsigned integers in shift_{right, left}_team_impl [\#6821](https://github.com/kokkos/kokkos/pull/6821)
|
||||
* Fix a bug in Makefile.kokkos when using AMD GPU architectures as `AMD_GFXYYY` [\#6892](https://github.com/kokkos/kokkos/pull/6892)
|
||||
|
||||
## [4.2.01](https://github.com/kokkos/kokkos/tree/4.2.01) (2023-12-07)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.00...4.2.01)
|
||||
|
||||
@ -999,95 +1099,95 @@
|
||||
- Major update for OpenMPTarget: many capabilities now work. For details contact us.
|
||||
- Added DPC++/SYCL backend: primary capabilites are working.
|
||||
- Added Kokkos Graph API analogous to CUDA Graphs.
|
||||
- Added parallel_scan support with TeamThreadRange [\#3536](https://github.com/kokkos/kokkos/pull/#3536)
|
||||
- Added Logical Memory Spaces [\#3546](https://github.com/kokkos/kokkos/pull/#3546)
|
||||
- Added initial half precision support [\#3439](https://github.com/kokkos/kokkos/pull/#3439)
|
||||
- Experimental feature: control cuda occupancy [\#3379](https://github.com/kokkos/kokkos/pull/#3379)
|
||||
- Added parallel_scan support with TeamThreadRange [\#3536](https://github.com/kokkos/kokkos/pull/3536)
|
||||
- Added Logical Memory Spaces [\#3546](https://github.com/kokkos/kokkos/pull/3546)
|
||||
- Added initial half precision support [\#3439](https://github.com/kokkos/kokkos/pull/3439)
|
||||
- Experimental feature: control cuda occupancy [\#3379](https://github.com/kokkos/kokkos/pull/3379)
|
||||
|
||||
**Implemented enhancements Backends and Archs:**
|
||||
- Add a64fx and fujitsu Compiler support [\#3614](https://github.com/kokkos/kokkos/pull/#3614)
|
||||
- Adding support for AMD gfx908 archictecture [\#3375](https://github.com/kokkos/kokkos/pull/#3375)
|
||||
- SYCL parallel\_for MDRangePolicy [\#3583](https://github.com/kokkos/kokkos/pull/#3583)
|
||||
- SYCL add parallel\_scan [\#3577](https://github.com/kokkos/kokkos/pull/#3577)
|
||||
- SYCL custom reductions [\#3544](https://github.com/kokkos/kokkos/pull/#3544)
|
||||
- SYCL Enable container unit tests [\#3550](https://github.com/kokkos/kokkos/pull/#3550)
|
||||
- SYCL feature level 5 [\#3480](https://github.com/kokkos/kokkos/pull/#3480)
|
||||
- SYCL Feature level 4 (parallel\_for) [\#3474](https://github.com/kokkos/kokkos/pull/#3474)
|
||||
- SYCL feature level 3 [\#3451](https://github.com/kokkos/kokkos/pull/#3451)
|
||||
- SYCL feature level 2 [\#3447](https://github.com/kokkos/kokkos/pull/#3447)
|
||||
- OpenMPTarget: Hierarchial reduction for + operator on scalars [\#3504](https://github.com/kokkos/kokkos/pull/#3504)
|
||||
- OpenMPTarget hierarchical [\#3411](https://github.com/kokkos/kokkos/pull/#3411)
|
||||
- HIP Add Impl::atomic\_[store,load] [\#3440](https://github.com/kokkos/kokkos/pull/#3440)
|
||||
- HIP enable global lock arrays [\#3418](https://github.com/kokkos/kokkos/pull/#3418)
|
||||
- HIP Implement multiple occupancy paths for various HIP kernel launchers [\#3366](https://github.com/kokkos/kokkos/pull/#3366)
|
||||
- Add a64fx and fujitsu Compiler support [\#3614](https://github.com/kokkos/kokkos/pull/3614)
|
||||
- Adding support for AMD gfx908 archictecture [\#3375](https://github.com/kokkos/kokkos/pull/3375)
|
||||
- SYCL parallel\_for MDRangePolicy [\#3583](https://github.com/kokkos/kokkos/pull/3583)
|
||||
- SYCL add parallel\_scan [\#3577](https://github.com/kokkos/kokkos/pull/3577)
|
||||
- SYCL custom reductions [\#3544](https://github.com/kokkos/kokkos/pull/3544)
|
||||
- SYCL Enable container unit tests [\#3550](https://github.com/kokkos/kokkos/pull/3550)
|
||||
- SYCL feature level 5 [\#3480](https://github.com/kokkos/kokkos/pull/3480)
|
||||
- SYCL Feature level 4 (parallel\_for) [\#3474](https://github.com/kokkos/kokkos/pull/3474)
|
||||
- SYCL feature level 3 [\#3451](https://github.com/kokkos/kokkos/pull/3451)
|
||||
- SYCL feature level 2 [\#3447](https://github.com/kokkos/kokkos/pull/3447)
|
||||
- OpenMPTarget: Hierarchial reduction for + operator on scalars [\#3504](https://github.com/kokkos/kokkos/pull/3504)
|
||||
- OpenMPTarget hierarchical [\#3411](https://github.com/kokkos/kokkos/pull/3411)
|
||||
- HIP Add Impl::atomic\_[store,load] [\#3440](https://github.com/kokkos/kokkos/pull/3440)
|
||||
- HIP enable global lock arrays [\#3418](https://github.com/kokkos/kokkos/pull/3418)
|
||||
- HIP Implement multiple occupancy paths for various HIP kernel launchers [\#3366](https://github.com/kokkos/kokkos/pull/3366)
|
||||
|
||||
**Implemented enhancements Policies:**
|
||||
- MDRangePolicy: Let it be semiregular [\#3494](https://github.com/kokkos/kokkos/pull/#3494)
|
||||
- MDRangePolicy: Check narrowing conversion in construction [\#3527](https://github.com/kokkos/kokkos/pull/#3527)
|
||||
- MDRangePolicy: CombinedReducers support [\#3395](https://github.com/kokkos/kokkos/pull/#3395)
|
||||
- Kokkos Graph: Interface and Default Implementation [\#3362](https://github.com/kokkos/kokkos/pull/#3362)
|
||||
- Kokkos Graph: add Cuda Graph implementation [\#3369](https://github.com/kokkos/kokkos/pull/#3369)
|
||||
- TeamPolicy: implemented autotuning of team sizes and vector lengths [\#3206](https://github.com/kokkos/kokkos/pull/#3206)
|
||||
- RangePolicy: Initialize all data members in default constructor [\#3509](https://github.com/kokkos/kokkos/pull/#3509)
|
||||
- MDRangePolicy: Let it be semiregular [\#3494](https://github.com/kokkos/kokkos/pull/3494)
|
||||
- MDRangePolicy: Check narrowing conversion in construction [\#3527](https://github.com/kokkos/kokkos/pull/3527)
|
||||
- MDRangePolicy: CombinedReducers support [\#3395](https://github.com/kokkos/kokkos/pull/3395)
|
||||
- Kokkos Graph: Interface and Default Implementation [\#3362](https://github.com/kokkos/kokkos/pull/3362)
|
||||
- Kokkos Graph: add Cuda Graph implementation [\#3369](https://github.com/kokkos/kokkos/pull/3369)
|
||||
- TeamPolicy: implemented autotuning of team sizes and vector lengths [\#3206](https://github.com/kokkos/kokkos/pull/3206)
|
||||
- RangePolicy: Initialize all data members in default constructor [\#3509](https://github.com/kokkos/kokkos/pull/3509)
|
||||
|
||||
**Implemented enhancements BuildSystem:**
|
||||
- Auto-generate core test files for all backends [\#3488](https://github.com/kokkos/kokkos/pull/#3488)
|
||||
- Avoid rewriting test files when calling cmake [\#3548](https://github.com/kokkos/kokkos/pull/#3548)
|
||||
- RULE\_LAUNCH\_COMPILE and RULE\_LAUNCH\_LINK system for nvcc\_wrapper [\#3136](https://github.com/kokkos/kokkos/pull/#3136)
|
||||
- Adding -include as a known argument to nvcc\_wrapper [\#3434](https://github.com/kokkos/kokkos/pull/#3434)
|
||||
- Install hpcbind script [\#3402](https://github.com/kokkos/kokkos/pull/#3402)
|
||||
- cmake/kokkos\_tribits.cmake: add parsing for args [\#3457](https://github.com/kokkos/kokkos/pull/#3457)
|
||||
- Auto-generate core test files for all backends [\#3488](https://github.com/kokkos/kokkos/pull/3488)
|
||||
- Avoid rewriting test files when calling cmake [\#3548](https://github.com/kokkos/kokkos/pull/3548)
|
||||
- RULE\_LAUNCH\_COMPILE and RULE\_LAUNCH\_LINK system for nvcc\_wrapper [\#3136](https://github.com/kokkos/kokkos/pull/3136)
|
||||
- Adding -include as a known argument to nvcc\_wrapper [\#3434](https://github.com/kokkos/kokkos/pull/3434)
|
||||
- Install hpcbind script [\#3402](https://github.com/kokkos/kokkos/pull/3402)
|
||||
- cmake/kokkos\_tribits.cmake: add parsing for args [\#3457](https://github.com/kokkos/kokkos/pull/3457)
|
||||
|
||||
**Implemented enhancements Tools:**
|
||||
- Changed namespacing of Kokkos::Tools::Impl::Impl::tune\_policy [\#3455](https://github.com/kokkos/kokkos/pull/#3455)
|
||||
- Delegate to an impl allocate/deallocate method to allow specifying a SpaceHandle for MemorySpaces [\#3530](https://github.com/kokkos/kokkos/pull/#3530)
|
||||
- Use the Kokkos Profiling interface rather than the Impl interface [\#3518](https://github.com/kokkos/kokkos/pull/#3518)
|
||||
- Runtime option for tuning [\#3459](https://github.com/kokkos/kokkos/pull/#3459)
|
||||
- Dual View Tool Events [\#3326](https://github.com/kokkos/kokkos/pull/#3326)
|
||||
- Changed namespacing of Kokkos::Tools::Impl::Impl::tune\_policy [\#3455](https://github.com/kokkos/kokkos/pull/3455)
|
||||
- Delegate to an impl allocate/deallocate method to allow specifying a SpaceHandle for MemorySpaces [\#3530](https://github.com/kokkos/kokkos/pull/3530)
|
||||
- Use the Kokkos Profiling interface rather than the Impl interface [\#3518](https://github.com/kokkos/kokkos/pull/3518)
|
||||
- Runtime option for tuning [\#3459](https://github.com/kokkos/kokkos/pull/3459)
|
||||
- Dual View Tool Events [\#3326](https://github.com/kokkos/kokkos/pull/3326)
|
||||
|
||||
**Implemented enhancements Other:**
|
||||
- Abort on errors instead of just printing [\#3528](https://github.com/kokkos/kokkos/pull/#3528)
|
||||
- Enable C++14 macros unconditionally [\#3449](https://github.com/kokkos/kokkos/pull/#3449)
|
||||
- Make ViewMapping trivially copyable [\#3436](https://github.com/kokkos/kokkos/pull/#3436)
|
||||
- Rename struct ViewMapping to class [\#3435](https://github.com/kokkos/kokkos/pull/#3435)
|
||||
- Replace enums in Kokkos\_ViewMapping.hpp (removes -Wextra) [\#3422](https://github.com/kokkos/kokkos/pull/#3422)
|
||||
- Use bool for enums representing bools [\#3416](https://github.com/kokkos/kokkos/pull/#3416)
|
||||
- Fence active instead of default execution space instances [\#3388](https://github.com/kokkos/kokkos/pull/#3388)
|
||||
- Refactor parallel\_reduce fence usage [\#3359](https://github.com/kokkos/kokkos/pull/#3359)
|
||||
- Moved Space EBO helpers to Kokkos\_EBO [\#3357](https://github.com/kokkos/kokkos/pull/#3357)
|
||||
- Add remove\_cvref type trait [\#3340](https://github.com/kokkos/kokkos/pull/#3340)
|
||||
- Adding identity type traits and update definition of identity\_t alias [\#3339](https://github.com/kokkos/kokkos/pull/#3339)
|
||||
- Add is\_specialization\_of type trait [\#3338](https://github.com/kokkos/kokkos/pull/#3338)
|
||||
- Make ScratchMemorySpace semi-regular [\#3309](https://github.com/kokkos/kokkos/pull/#3309)
|
||||
- Optimize min/max atomics with early exit on no-op case [\#3265](https://github.com/kokkos/kokkos/pull/#3265)
|
||||
- Refactor Backend Development [\#2941](https://github.com/kokkos/kokkos/pull/#2941)
|
||||
- Abort on errors instead of just printing [\#3528](https://github.com/kokkos/kokkos/pull/3528)
|
||||
- Enable C++14 macros unconditionally [\#3449](https://github.com/kokkos/kokkos/pull/3449)
|
||||
- Make ViewMapping trivially copyable [\#3436](https://github.com/kokkos/kokkos/pull/3436)
|
||||
- Rename struct ViewMapping to class [\#3435](https://github.com/kokkos/kokkos/pull/3435)
|
||||
- Replace enums in Kokkos\_ViewMapping.hpp (removes -Wextra) [\#3422](https://github.com/kokkos/kokkos/pull/3422)
|
||||
- Use bool for enums representing bools [\#3416](https://github.com/kokkos/kokkos/pull/3416)
|
||||
- Fence active instead of default execution space instances [\#3388](https://github.com/kokkos/kokkos/pull/3388)
|
||||
- Refactor parallel\_reduce fence usage [\#3359](https://github.com/kokkos/kokkos/pull/3359)
|
||||
- Moved Space EBO helpers to Kokkos\_EBO [\#3357](https://github.com/kokkos/kokkos/pull/3357)
|
||||
- Add remove\_cvref type trait [\#3340](https://github.com/kokkos/kokkos/pull/3340)
|
||||
- Adding identity type traits and update definition of identity\_t alias [\#3339](https://github.com/kokkos/kokkos/pull/3339)
|
||||
- Add is\_specialization\_of type trait [\#3338](https://github.com/kokkos/kokkos/pull/3338)
|
||||
- Make ScratchMemorySpace semi-regular [\#3309](https://github.com/kokkos/kokkos/pull/3309)
|
||||
- Optimize min/max atomics with early exit on no-op case [\#3265](https://github.com/kokkos/kokkos/pull/3265)
|
||||
- Refactor Backend Development [\#2941](https://github.com/kokkos/kokkos/pull/2941)
|
||||
|
||||
**Fixed bugs:**
|
||||
- Fixup MDRangePolicy construction from Kokkos arrays [\#3591](https://github.com/kokkos/kokkos/pull/#3591)
|
||||
- Add atomic functions for unsigned long long using gcc built-in [\#3588](https://github.com/kokkos/kokkos/pull/#3588)
|
||||
- Fixup silent pointless comparison with zero in checked\_narrow\_cast (compiler workaround) [\#3566](https://github.com/kokkos/kokkos/pull/#3566)
|
||||
- Fixes for ROCm 3.9 [\#3565](https://github.com/kokkos/kokkos/pull/#3565)
|
||||
- Fix windows build issues which crept in for the CUDA build [\#3532](https://github.com/kokkos/kokkos/pull/#3532)
|
||||
- HIP Fix atomics of large data types and clean up lock arrays [\#3529](https://github.com/kokkos/kokkos/pull/#3529)
|
||||
- Pthreads fix exception resulting from 0 grain size [\#3510](https://github.com/kokkos/kokkos/pull/#3510)
|
||||
- Fixup do not require atomic operation to be default constructible [\#3503](https://github.com/kokkos/kokkos/pull/#3503)
|
||||
- Fix race condition in HIP backend [\#3467](https://github.com/kokkos/kokkos/pull/#3467)
|
||||
- Replace KOKKOS\_DEBUG with KOKKOS\_ENABLE\_DEBUG [\#3458](https://github.com/kokkos/kokkos/pull/#3458)
|
||||
- Fix multi-stream team scratch space definition for HIP [\#3398](https://github.com/kokkos/kokkos/pull/#3398)
|
||||
- HIP fix template deduction [\#3393](https://github.com/kokkos/kokkos/pull/#3393)
|
||||
- Fix compiling with HIP and C++17 [\#3390](https://github.com/kokkos/kokkos/pull/#3390)
|
||||
- Fix sigFPE in HIP blocksize deduction [\#3378](https://github.com/kokkos/kokkos/pull/#3378)
|
||||
- Type alias change: replace CS with CTS to avoid conflicts with NVSHMEM [\#3348](https://github.com/kokkos/kokkos/pull/#3348)
|
||||
- Clang compilation of CUDA backend on Windows [\#3345](https://github.com/kokkos/kokkos/pull/#3345)
|
||||
- Fix HBW support [\#3343](https://github.com/kokkos/kokkos/pull/#3343)
|
||||
- Added missing fences to unique token [\#3260](https://github.com/kokkos/kokkos/pull/#3260)
|
||||
- Fixup MDRangePolicy construction from Kokkos arrays [\#3591](https://github.com/kokkos/kokkos/pull/3591)
|
||||
- Add atomic functions for unsigned long long using gcc built-in [\#3588](https://github.com/kokkos/kokkos/pull/3588)
|
||||
- Fixup silent pointless comparison with zero in checked\_narrow\_cast (compiler workaround) [\#3566](https://github.com/kokkos/kokkos/pull/3566)
|
||||
- Fixes for ROCm 3.9 [\#3565](https://github.com/kokkos/kokkos/pull/3565)
|
||||
- Fix windows build issues which crept in for the CUDA build [\#3532](https://github.com/kokkos/kokkos/pull/3532)
|
||||
- HIP Fix atomics of large data types and clean up lock arrays [\#3529](https://github.com/kokkos/kokkos/pull/3529)
|
||||
- Pthreads fix exception resulting from 0 grain size [\#3510](https://github.com/kokkos/kokkos/pull/3510)
|
||||
- Fixup do not require atomic operation to be default constructible [\#3503](https://github.com/kokkos/kokkos/pull/3503)
|
||||
- Fix race condition in HIP backend [\#3467](https://github.com/kokkos/kokkos/pull/3467)
|
||||
- Replace KOKKOS\_DEBUG with KOKKOS\_ENABLE\_DEBUG [\#3458](https://github.com/kokkos/kokkos/pull/3458)
|
||||
- Fix multi-stream team scratch space definition for HIP [\#3398](https://github.com/kokkos/kokkos/pull/3398)
|
||||
- HIP fix template deduction [\#3393](https://github.com/kokkos/kokkos/pull/3393)
|
||||
- Fix compiling with HIP and C++17 [\#3390](https://github.com/kokkos/kokkos/pull/3390)
|
||||
- Fix sigFPE in HIP blocksize deduction [\#3378](https://github.com/kokkos/kokkos/pull/3378)
|
||||
- Type alias change: replace CS with CTS to avoid conflicts with NVSHMEM [\#3348](https://github.com/kokkos/kokkos/pull/3348)
|
||||
- Clang compilation of CUDA backend on Windows [\#3345](https://github.com/kokkos/kokkos/pull/3345)
|
||||
- Fix HBW support [\#3343](https://github.com/kokkos/kokkos/pull/3343)
|
||||
- Added missing fences to unique token [\#3260](https://github.com/kokkos/kokkos/pull/3260)
|
||||
|
||||
**Incompatibilities:**
|
||||
- Remove unused utilities (forward, move, and expand\_variadic) from Kokkos::Impl [\#3535](https://github.com/kokkos/kokkos/pull/#3535)
|
||||
- Remove unused traits [\#3534](https://github.com/kokkos/kokkos/pull/#3534)
|
||||
- HIP: Remove old HCC code [\#3301](https://github.com/kokkos/kokkos/pull/#3301)
|
||||
- Prepare for deprecation of ViewAllocateWithoutInitializing [\#3264](https://github.com/kokkos/kokkos/pull/#3264)
|
||||
- Remove ROCm backend [\#3148](https://github.com/kokkos/kokkos/pull/#3148)
|
||||
- Remove unused utilities (forward, move, and expand\_variadic) from Kokkos::Impl [\#3535](https://github.com/kokkos/kokkos/pull/3535)
|
||||
- Remove unused traits [\#3534](https://github.com/kokkos/kokkos/pull/3534)
|
||||
- HIP: Remove old HCC code [\#3301](https://github.com/kokkos/kokkos/pull/3301)
|
||||
- Prepare for deprecation of ViewAllocateWithoutInitializing [\#3264](https://github.com/kokkos/kokkos/pull/3264)
|
||||
- Remove ROCm backend [\#3148](https://github.com/kokkos/kokkos/pull/3148)
|
||||
|
||||
## [3.2.01](https://github.com/kokkos/kokkos/tree/3.2.01) (2020-11-17)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.2.00...3.2.01)
|
||||
|
||||
@ -150,8 +150,8 @@ ENDIF()
|
||||
|
||||
|
||||
set(Kokkos_VERSION_MAJOR 4)
|
||||
set(Kokkos_VERSION_MINOR 2)
|
||||
set(Kokkos_VERSION_PATCH 1)
|
||||
set(Kokkos_VERSION_MINOR 3)
|
||||
set(Kokkos_VERSION_PATCH 0)
|
||||
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
|
||||
message(STATUS "Kokkos version: ${Kokkos_VERSION}")
|
||||
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
|
||||
|
||||
@ -11,8 +11,8 @@ CXXFLAGS += $(SHFLAGS)
|
||||
endif
|
||||
|
||||
KOKKOS_VERSION_MAJOR = 4
|
||||
KOKKOS_VERSION_MINOR = 2
|
||||
KOKKOS_VERSION_PATCH = 1
|
||||
KOKKOS_VERSION_MINOR = 3
|
||||
KOKKOS_VERSION_PATCH = 0
|
||||
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
|
||||
|
||||
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
|
||||
@ -22,14 +22,14 @@ KOKKOS_DEVICES ?= "OpenMP"
|
||||
# Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR
|
||||
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
|
||||
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
|
||||
# IBM: BGQ,Power7,Power8,Power9
|
||||
# AMD-GPUS: GFX906,GFX908,GFX90A,GFX940,GFX942,GFX1030,GFX1100
|
||||
# IBM: Power8,Power9
|
||||
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100
|
||||
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
|
||||
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
|
||||
KOKKOS_ARCH ?= ""
|
||||
# Options: yes,no
|
||||
KOKKOS_DEBUG ?= "no"
|
||||
# Options: hwloc,librt,experimental_memkind
|
||||
# Options: hwloc
|
||||
KOKKOS_USE_TPLS ?= ""
|
||||
# Options: c++17,c++1z,c++20,c++2a,c++23,c++2b
|
||||
KOKKOS_CXX_STANDARD ?= "c++17"
|
||||
@ -56,7 +56,7 @@ uppercase_internal=$(if $1,$$(subst $(firstword $1),$(call uppercase_internal,$(
|
||||
uppercase=$(eval uppercase_RESULT:=$(call uppercase_internal,$(uppercase_TABLE),$1))$(uppercase_RESULT)
|
||||
# Return a 1 if a string contains a substring and 0 if not
|
||||
# Note the search string should be without '"'
|
||||
# Example: $(call kokkos_has_string,"hwloc,librt",hwloc)
|
||||
# Example: $(call kokkos_has_string,"hwloc,libdl",hwloc)
|
||||
# Will return a 1
|
||||
kokkos_has_string=$(if $(findstring $(call uppercase,$2),$(call uppercase,$1)),1,0)
|
||||
# Returns 1 if the path exists, 0 otherwise
|
||||
@ -73,11 +73,11 @@ KOKKOS_INTERNAL_ENABLE_CXX20 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),
|
||||
KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a)
|
||||
KOKKOS_INTERNAL_ENABLE_CXX23 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++23)
|
||||
KOKKOS_INTERNAL_ENABLE_CXX2B := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2b)
|
||||
KOKKOS_INTERNAL_ENABLE_CXX26 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++26)
|
||||
KOKKOS_INTERNAL_ENABLE_CXX2C := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2c)
|
||||
|
||||
# Check for external libraries.
|
||||
KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc)
|
||||
KOKKOS_INTERNAL_USE_LIBRT := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),librt)
|
||||
KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),experimental_memkind)
|
||||
|
||||
# Check for advanced settings.
|
||||
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
|
||||
@ -318,7 +318,6 @@ endif
|
||||
|
||||
# Intel based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_KNC := $(call kokkos_has_string,$(KOKKOS_ARCH),KNC)
|
||||
KOKKOS_INTERNAL_USE_ARCH_WSM := $(call kokkos_has_string,$(KOKKOS_ARCH),WSM)
|
||||
KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB)
|
||||
KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW)
|
||||
KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW)
|
||||
@ -398,11 +397,9 @@ KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc))
|
||||
|
||||
# IBM based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(call kokkos_has_string,$(KOKKOS_ARCH),BGQ)
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power7)
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8)
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power9)
|
||||
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
|
||||
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
|
||||
|
||||
# AMD based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
|
||||
@ -413,22 +410,37 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
|
||||
endif
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A))
|
||||
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100)
|
||||
endif
|
||||
|
||||
# Any AVX?
|
||||
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
|
||||
|
||||
# Incompatible flags?
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc)
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc)
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
|
||||
@ -573,6 +585,16 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2B), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2B_FLAG)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX23")
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX26), 1)
|
||||
#I cannot make CMake add this in a good way - so add it here
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX26_FLAG)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26")
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2C), 1)
|
||||
#I cannot make CMake add this in a good way - so add it here
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2C_FLAG)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
@ -612,27 +634,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HWLOC")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_LIBRT")
|
||||
KOKKOS_LIBS += -lrt
|
||||
KOKKOS_TPL_LIBRARY_NAMES += rt
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
ifneq ($(KOKKOS_CMAKE), yes)
|
||||
ifneq ($(MEMKIND_PATH),)
|
||||
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
|
||||
KOKKOS_LIBDIRS += -L$(MEMKIND_PATH)/lib
|
||||
KOKKOS_CXXLDFLAGS += -L$(MEMKIND_PATH)/lib
|
||||
KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include
|
||||
KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib
|
||||
endif
|
||||
KOKKOS_LIBS += -lmemkind -lnuma
|
||||
KOKKOS_TPL_LIBRARY_NAMES += memkind numa
|
||||
endif
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HBWSPACE")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LARGE_MEM_TESTS")
|
||||
endif
|
||||
@ -699,10 +700,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC), 0)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC")
|
||||
else
|
||||
@ -827,20 +824,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_SSE42")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xSSE4.2
|
||||
KOKKOS_LDFLAGS += -xSSE4.2
|
||||
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
else
|
||||
# Assume that this is a really a GNU compiler.
|
||||
KOKKOS_CXXFLAGS += -msse4.2
|
||||
KOKKOS_LDFLAGS += -msse4.2
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX")
|
||||
|
||||
@ -1249,7 +1232,6 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
|
||||
tmp := $(call kokkos_update_config_header, KOKKOS_FWD_HPP_, "KokkosCore_Config_FwdBackend.tmp", "KokkosCore_Config_FwdBackend.hpp")
|
||||
tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config_SetupBackend.tmp", "KokkosCore_Config_SetupBackend.hpp")
|
||||
tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp")
|
||||
tmp := $(call kokkos_update_config_header, KOKKOS_POST_INCLUDE_HPP_, "KokkosCore_Config_PostInclude.tmp", "KokkosCore_Config_PostInclude.hpp")
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_CUDA.hpp>","KokkosCore_Config_FwdBackend.hpp")
|
||||
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_CUDA.hpp>","KokkosCore_Config_DeclareBackend.hpp")
|
||||
@ -1289,10 +1271,6 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
|
||||
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_SERIAL.hpp>","KokkosCore_Config_FwdBackend.hpp")
|
||||
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_SERIAL.hpp>","KokkosCore_Config_DeclareBackend.hpp")
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HBWSpace.hpp>","KokkosCore_Config_FwdBackend.hpp")
|
||||
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HBWSpace.hpp>","KokkosCore_Config_DeclareBackend.hpp")
|
||||
endif
|
||||
endif
|
||||
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
|
||||
@ -1403,11 +1381,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
|
||||
KOKKOS_TPL_LIBRARY_NAMES += hpx
|
||||
endif
|
||||
|
||||
# Don't include Kokkos_HBWSpace.cpp if not using MEMKIND to avoid a link warning.
|
||||
ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC))
|
||||
endif
|
||||
|
||||
# With Cygwin functions such as fdopen and fileno are not defined
|
||||
# when strict ansi is enabled. strict ansi gets enabled with -std=c++14
|
||||
# though. So we hard undefine it here. Not sure if that has any bad side effects
|
||||
@ -1461,6 +1434,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
|
||||
else
|
||||
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENMP */")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)
|
||||
tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_OPENACC")
|
||||
else
|
||||
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENACC */")
|
||||
endif
|
||||
tmp := $(call desul_append_header, "")
|
||||
tmp := $(call desul_append_header, "$H""endif")
|
||||
|
||||
@ -1493,7 +1472,7 @@ include $(KOKKOS_PATH)/Makefile.targets
|
||||
kokkos-clean:
|
||||
rm -f $(KOKKOS_OBJ_LINK) $(DESUL_CONFIG_HEADER) $(DESUL_INTERNAL_CONFIG_TMP) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a KokkosCore_Config_SetupBackend.hpp \
|
||||
KokkosCore_Config_FwdBackend.hpp KokkosCore_Config_DeclareBackend.hpp KokkosCore_Config_DeclareBackend.tmp \
|
||||
KokkosCore_Config_FwdBackend.tmp KokkosCore_Config_PostInclude.hpp KokkosCore_Config_PostInclude.tmp KokkosCore_Config_SetupBackend.tmp
|
||||
KokkosCore_Config_FwdBackend.tmp KokkosCore_Config_SetupBackend.tmp
|
||||
|
||||
libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
|
||||
ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
|
||||
|
||||
@ -20,8 +20,6 @@ Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Ta
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
|
||||
Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
|
||||
Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
|
||||
Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
|
||||
Kokkos_Profiling.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
|
||||
@ -30,8 +28,6 @@ Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
|
||||
Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
|
||||
Kokkos_MemorySpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
|
||||
Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
|
||||
Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp
|
||||
@ -82,8 +78,10 @@ Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
|
||||
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
||||
Kokkos_Threads_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
|
||||
Kokkos_Threads_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Spinwait.cpp
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
@ -123,6 +121,3 @@ Kokkos_OpenACC_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC
|
||||
Kokkos_OpenACC_SharedAllocationRecord.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp
|
||||
endif
|
||||
|
||||
Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
|
||||
|
||||
@ -28,7 +28,7 @@ To start learning about Kokkos:
|
||||
|
||||
- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
|
||||
|
||||
For questions find us on Slack: https://kokkosteam.slack.com or open a github issue.
|
||||
For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue.
|
||||
|
||||
For non-public questions send an email to: *crtrott(at)sandia.gov*
|
||||
|
||||
@ -48,10 +48,10 @@ Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citati
|
||||
|
||||
# License
|
||||
|
||||
[](https://opensource.org/licenses/BSD-3-Clause)
|
||||
[](https://spdx.org/licenses/LLVM-exception.html)
|
||||
|
||||
Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
the U.S. Government retains certain rights in this software.
|
||||
|
||||
The full license statement used in all headers is available [here](https://kokkos.github.io/kokkos-core-wiki/license.html) or
|
||||
[here](https://github.com/kokkos/kokkos/blob/master/LICENSE).
|
||||
The full license statement used in all headers is available [here](https://kokkos.org/kokkos-core-wiki/license.html) or
|
||||
[here](https://github.com/kokkos/kokkos/blob/develop/LICENSE).
|
||||
|
||||
12
lib/kokkos/SECURITY.md
Normal file
12
lib/kokkos/SECURITY.md
Normal file
@ -0,0 +1,12 @@
|
||||
# Reporting Security Issues
|
||||
|
||||
To report a security issue, please email
|
||||
[lebrungrandt@ornl.gov](mailto:lebrungrandt@ornl.gov)
|
||||
and [crtrott@sandia.gov](mailto:crtrott@sandia.gov)
|
||||
with a description of the issue, the steps you took to create the issue,
|
||||
affected versions, and, if known, mitigations for the issue.
|
||||
|
||||
Our vulnerability management team will respond within 5 working days of your
|
||||
email. If the issue is confirmed as a vulnerability, we will open a
|
||||
Security Advisory and acknowledge your contributions as part of it. This project
|
||||
follows a 90 day disclosure timeline.
|
||||
@ -159,7 +159,6 @@ If you don't specify a CUDA build variant in a `packages.yaml` and you build you
|
||||
> spack install superscience
|
||||
````
|
||||
you may end up just getting the default Kokkos (i.e. Serial).
|
||||
Some examples are included in the `config/yaml` folder for common platforms.
|
||||
Before running `spack install <package>` we recommend running `spack spec <package>` to confirm your dependency tree is correct.
|
||||
For example, with Kokkos Kernels:
|
||||
````bash
|
||||
|
||||
@ -30,5 +30,5 @@ KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
|
||||
|
||||
|
||||
KOKKOS_LINK_TPL(kokkoscontainers PUBLIC ROCTHRUST)
|
||||
KOKKOS_LINK_TPL(kokkoscore PUBLIC ONEDPL)
|
||||
|
||||
@ -849,18 +849,17 @@ class Random_XorShift64 {
|
||||
return drand(end - start) + start;
|
||||
}
|
||||
|
||||
// Marsaglia polar method for drawing a standard normal distributed random
|
||||
// Box-muller method for drawing a standard normal distributed random
|
||||
// number
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double normal() {
|
||||
double S = 2.0;
|
||||
double U;
|
||||
while (S >= 1.0) {
|
||||
U = 2.0 * drand() - 1.0;
|
||||
const double V = 2.0 * drand() - 1.0;
|
||||
S = U * U + V * V;
|
||||
}
|
||||
return U * std::sqrt(-2.0 * std::log(S) / S);
|
||||
constexpr auto two_pi = 2 * Kokkos::numbers::pi_v<double>;
|
||||
|
||||
const double u = drand();
|
||||
const double v = drand();
|
||||
const double r = Kokkos::sqrt(-2.0 * Kokkos::log(u));
|
||||
const double theta = v * two_pi;
|
||||
return r * Kokkos::cos(theta);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -1094,18 +1093,17 @@ class Random_XorShift1024 {
|
||||
return drand(end - start) + start;
|
||||
}
|
||||
|
||||
// Marsaglia polar method for drawing a standard normal distributed random
|
||||
// Box-muller method for drawing a standard normal distributed random
|
||||
// number
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double normal() {
|
||||
double S = 2.0;
|
||||
double U;
|
||||
while (S >= 1.0) {
|
||||
U = 2.0 * drand() - 1.0;
|
||||
const double V = 2.0 * drand() - 1.0;
|
||||
S = U * U + V * V;
|
||||
}
|
||||
return U * std::sqrt(-2.0 * std::log(S) / S);
|
||||
constexpr auto two_pi = 2 * Kokkos::numbers::pi_v<double>;
|
||||
|
||||
const double u = drand();
|
||||
const double v = drand();
|
||||
const double r = Kokkos::sqrt(-2.0 * Kokkos::log(u));
|
||||
const double theta = v * two_pi;
|
||||
return r * Kokkos::cos(theta);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -1545,13 +1543,23 @@ template <class ViewType, class RandomPool, class IndexType = int64_t>
|
||||
void fill_random(ViewType a, RandomPool g,
|
||||
typename ViewType::const_value_type begin,
|
||||
typename ViewType::const_value_type end) {
|
||||
fill_random(typename ViewType::execution_space{}, a, g, begin, end);
|
||||
Kokkos::fence(
|
||||
"fill_random: fence before since no execution space instance provided");
|
||||
typename ViewType::execution_space exec;
|
||||
fill_random(exec, a, g, begin, end);
|
||||
exec.fence(
|
||||
"fill_random: fence after since no execution space instance provided");
|
||||
}
|
||||
|
||||
template <class ViewType, class RandomPool, class IndexType = int64_t>
|
||||
void fill_random(ViewType a, RandomPool g,
|
||||
typename ViewType::const_value_type range) {
|
||||
fill_random(typename ViewType::execution_space{}, a, g, 0, range);
|
||||
Kokkos::fence(
|
||||
"fill_random: fence before since no execution space instance provided");
|
||||
typename ViewType::execution_space exec;
|
||||
fill_random(exec, a, g, 0, range);
|
||||
exec.fence(
|
||||
"fill_random: fence after since no execution space instance provided");
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
|
||||
#include "sorting/Kokkos_BinSortPublicAPI.hpp"
|
||||
#include "sorting/Kokkos_SortPublicAPI.hpp"
|
||||
#include "sorting/Kokkos_SortByKeyPublicAPI.hpp"
|
||||
#include "sorting/Kokkos_NestedSortPublicAPI.hpp"
|
||||
|
||||
#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT
|
||||
|
||||
@ -35,7 +35,6 @@
|
||||
// following the std classification.
|
||||
|
||||
// modifying ops
|
||||
#include "std_algorithms/Kokkos_Swap.hpp"
|
||||
#include "std_algorithms/Kokkos_IterSwap.hpp"
|
||||
|
||||
// non-modifying sequence
|
||||
|
||||
117
lib/kokkos/algorithms/src/sorting/Kokkos_SortByKeyPublicAPI.hpp
Normal file
117
lib/kokkos/algorithms/src/sorting/Kokkos_SortByKeyPublicAPI.hpp
Normal file
@ -0,0 +1,117 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 4.0
|
||||
// Copyright (2022) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://kokkos.org/LICENSE for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#ifndef KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_
|
||||
#define KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_
|
||||
|
||||
#include "./impl/Kokkos_SortByKeyImpl.hpp"
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <algorithm>
|
||||
|
||||
namespace Kokkos::Experimental {
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// basic overloads
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
|
||||
class ValuesDataType, class... ValuesProperties>
|
||||
void sort_by_key(
|
||||
const ExecutionSpace& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
|
||||
// constraints
|
||||
using KeysType = Kokkos::View<KeysDataType, KeysProperties...>;
|
||||
using ValuesType = Kokkos::View<ValuesDataType, ValuesProperties...>;
|
||||
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(keys);
|
||||
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(values);
|
||||
|
||||
static_assert(SpaceAccessibility<ExecutionSpace,
|
||||
typename KeysType::memory_space>::accessible,
|
||||
"Kokkos::sort: execution space instance is not able to access "
|
||||
"the memory space of the keys View argument!");
|
||||
static_assert(
|
||||
SpaceAccessibility<ExecutionSpace,
|
||||
typename ValuesType::memory_space>::accessible,
|
||||
"Kokkos::sort: execution space instance is not able to access "
|
||||
"the memory space of the values View argument!");
|
||||
|
||||
static_assert(KeysType::static_extent(0) == 0 ||
|
||||
ValuesType::static_extent(0) == 0 ||
|
||||
KeysType::static_extent(0) == ValuesType::static_extent(0));
|
||||
if (values.size() != keys.size())
|
||||
Kokkos::abort((std::string("values and keys extents must be the same. The "
|
||||
"values extent is ") +
|
||||
std::to_string(values.size()) + ", and the keys extent is " +
|
||||
std::to_string(keys.size()) + ".")
|
||||
.c_str());
|
||||
|
||||
if (keys.extent(0) <= 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
::Kokkos::Impl::sort_by_key_device_view_without_comparator(exec, keys,
|
||||
values);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// overloads supporting a custom comparator
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
template <class ExecutionSpace, class ComparatorType, class KeysDataType,
|
||||
class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties>
|
||||
void sort_by_key(
|
||||
const ExecutionSpace& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
const ComparatorType& comparator) {
|
||||
// constraints
|
||||
using KeysType = Kokkos::View<KeysDataType, KeysProperties...>;
|
||||
using ValuesType = Kokkos::View<ValuesDataType, ValuesProperties...>;
|
||||
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(keys);
|
||||
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(values);
|
||||
|
||||
static_assert(SpaceAccessibility<ExecutionSpace,
|
||||
typename KeysType::memory_space>::accessible,
|
||||
"Kokkos::sort: execution space instance is not able to access "
|
||||
"the memory space of the keys View argument!");
|
||||
static_assert(
|
||||
SpaceAccessibility<ExecutionSpace,
|
||||
typename ValuesType::memory_space>::accessible,
|
||||
"Kokkos::sort: execution space instance is not able to access "
|
||||
"the memory space of the values View argument!");
|
||||
|
||||
static_assert(KeysType::static_extent(0) == 0 ||
|
||||
ValuesType::static_extent(0) == 0 ||
|
||||
KeysType::static_extent(0) == ValuesType::static_extent(0));
|
||||
if (values.size() != keys.size())
|
||||
Kokkos::abort((std::string("values and keys extents must be the same. The "
|
||||
"values extent is ") +
|
||||
std::to_string(values.size()) + ", and the keys extent is " +
|
||||
std::to_string(keys.size()) + ".")
|
||||
.c_str());
|
||||
|
||||
if (keys.extent(0) <= 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
::Kokkos::Impl::sort_by_key_device_view_with_comparator(exec, keys, values,
|
||||
comparator);
|
||||
}
|
||||
|
||||
} // namespace Kokkos::Experimental
|
||||
#endif
|
||||
@ -29,7 +29,7 @@ namespace Kokkos {
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
template <class ExecutionSpace, class DataType, class... Properties>
|
||||
void sort([[maybe_unused]] const ExecutionSpace& exec,
|
||||
void sort(const ExecutionSpace& exec,
|
||||
const Kokkos::View<DataType, Properties...>& view) {
|
||||
// constraints
|
||||
using ViewType = Kokkos::View<DataType, Properties...>;
|
||||
@ -52,6 +52,7 @@ void sort([[maybe_unused]] const ExecutionSpace& exec,
|
||||
}
|
||||
|
||||
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
|
||||
exec.fence("Kokkos::sort without comparator use std::sort");
|
||||
auto first = ::Kokkos::Experimental::begin(view);
|
||||
auto last = ::Kokkos::Experimental::end(view);
|
||||
std::sort(first, last);
|
||||
@ -82,7 +83,7 @@ void sort(const Kokkos::View<DataType, Properties...>& view) {
|
||||
// ---------------------------------------------------------------
|
||||
template <class ExecutionSpace, class ComparatorType, class DataType,
|
||||
class... Properties>
|
||||
void sort([[maybe_unused]] const ExecutionSpace& exec,
|
||||
void sort(const ExecutionSpace& exec,
|
||||
const Kokkos::View<DataType, Properties...>& view,
|
||||
const ComparatorType& comparator) {
|
||||
// constraints
|
||||
@ -105,6 +106,7 @@ void sort([[maybe_unused]] const ExecutionSpace& exec,
|
||||
}
|
||||
|
||||
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
|
||||
exec.fence("Kokkos::sort with comparator use std::sort");
|
||||
auto first = ::Kokkos::Experimental::begin(view);
|
||||
auto last = ::Kokkos::Experimental::end(view);
|
||||
std::sort(first, last, comparator);
|
||||
|
||||
@ -18,7 +18,6 @@
|
||||
#define KOKKOS_NESTED_SORT_IMPL_HPP_
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <std_algorithms/Kokkos_Swap.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
@ -99,7 +98,7 @@ KOKKOS_INLINE_FUNCTION void sort_nested_impl(
|
||||
keyView(elem1) = key2;
|
||||
keyView(elem2) = key1;
|
||||
if constexpr (!std::is_same_v<ValueViewType, std::nullptr_t>) {
|
||||
Kokkos::Experimental::swap(valueView(elem1), valueView(elem2));
|
||||
Kokkos::kokkos_swap(valueView(elem1), valueView(elem2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
401
lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp
Normal file
401
lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp
Normal file
@ -0,0 +1,401 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 4.0
|
||||
// Copyright (2022) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://kokkos.org/LICENSE for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#ifndef KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_
|
||||
#define KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
|
||||
// Workaround for `Instruction 'shfl' without '.sync' is not supported on
|
||||
// .target sm_70 and higher from PTX ISA version 6.4`.
|
||||
// Also see https://github.com/NVIDIA/cub/pull/170.
|
||||
#if !defined(CUB_USE_COOPERATIVE_GROUPS)
|
||||
#define CUB_USE_COOPERATIVE_GROUPS
|
||||
#endif
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
|
||||
#if defined(KOKKOS_COMPILER_CLANG)
|
||||
// Some versions of Clang fail to compile Thrust, failing with errors like
|
||||
// this:
|
||||
// <snip>/thrust/system/cuda/detail/core/agent_launcher.h:557:11:
|
||||
// error: use of undeclared identifier 'va_printf'
|
||||
// The exact combination of versions for Clang and Thrust (or CUDA) for this
|
||||
// failure was not investigated, however even very recent version combination
|
||||
// (Clang 10.0.0 and Cuda 10.0) demonstrated failure.
|
||||
//
|
||||
// Defining _CubLog here locally allows us to avoid that code path, however
|
||||
// disabling some debugging diagnostics
|
||||
#pragma push_macro("_CubLog")
|
||||
#ifdef _CubLog
|
||||
#undef _CubLog
|
||||
#endif
|
||||
#define _CubLog
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
#pragma pop_macro("_CubLog")
|
||||
#else
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
#endif
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL) && \
|
||||
(ONEDPL_VERSION_MAJOR > 2022 || \
|
||||
(ONEDPL_VERSION_MAJOR == 2022 && ONEDPL_VERSION_MINOR >= 2))
|
||||
#define KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
#include <oneapi/dpl/execution>
|
||||
#include <oneapi/dpl/algorithm>
|
||||
#endif
|
||||
|
||||
namespace Kokkos::Impl {
|
||||
|
||||
template <typename T>
|
||||
constexpr inline bool is_admissible_to_kokkos_sort_by_key =
|
||||
::Kokkos::is_view<T>::value&& T::rank() == 1 &&
|
||||
(std::is_same<typename T::traits::array_layout,
|
||||
Kokkos::LayoutLeft>::value ||
|
||||
std::is_same<typename T::traits::array_layout,
|
||||
Kokkos::LayoutRight>::value ||
|
||||
std::is_same<typename T::traits::array_layout,
|
||||
Kokkos::LayoutStride>::value);
|
||||
|
||||
template <class ViewType>
|
||||
KOKKOS_INLINE_FUNCTION constexpr void
|
||||
static_assert_is_admissible_to_kokkos_sort_by_key(const ViewType& /* view */) {
|
||||
static_assert(is_admissible_to_kokkos_sort_by_key<ViewType>,
|
||||
"Kokkos::sort_by_key only accepts 1D values View with "
|
||||
"LayoutRight, LayoutLeft or LayoutStride.");
|
||||
}
|
||||
|
||||
// For the fallback implementation for sort_by_key using Kokkos::sort, we need
|
||||
// to consider if Kokkos::sort defers to the fallback implementation that copies
|
||||
// the array to the host and uses std::sort, see
|
||||
// copy_to_host_run_stdsort_copy_back() in impl/Kokkos_SortImpl.hpp. If
|
||||
// sort_on_device_v is true, we assume that std::sort doesn't copy data.
|
||||
// Otherwise, we manually copy all data to the host and provide Kokkos::sort
|
||||
// with a host execution space.
|
||||
template <class ExecutionSpace, class Layout>
|
||||
inline constexpr bool sort_on_device_v = false;
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
template <class Layout>
|
||||
inline constexpr bool sort_on_device_v<Kokkos::Cuda, Layout> = true;
|
||||
|
||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties, class... MaybeComparator>
|
||||
void sort_by_key_cudathrust(
|
||||
const Kokkos::Cuda& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
MaybeComparator&&... maybeComparator) {
|
||||
const auto policy = thrust::cuda::par.on(exec.cuda_stream());
|
||||
auto keys_first = ::Kokkos::Experimental::begin(keys);
|
||||
auto keys_last = ::Kokkos::Experimental::end(keys);
|
||||
auto values_first = ::Kokkos::Experimental::begin(values);
|
||||
thrust::sort_by_key(policy, keys_first, keys_last, values_first,
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class Layout>
|
||||
inline constexpr bool sort_on_device_v<Kokkos::HIP, Layout> = true;
|
||||
|
||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties, class... MaybeComparator>
|
||||
void sort_by_key_rocthrust(
|
||||
const Kokkos::HIP& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
MaybeComparator&&... maybeComparator) {
|
||||
const auto policy = thrust::hip::par.on(exec.hip_stream());
|
||||
auto keys_first = ::Kokkos::Experimental::begin(keys);
|
||||
auto keys_last = ::Kokkos::Experimental::end(keys);
|
||||
auto values_first = ::Kokkos::Experimental::begin(values);
|
||||
thrust::sort_by_key(policy, keys_first, keys_last, values_first,
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
template <class Layout>
|
||||
inline constexpr bool sort_on_device_v<Kokkos::Experimental::SYCL, Layout> =
|
||||
std::is_same_v<Layout, Kokkos::LayoutLeft> ||
|
||||
std::is_same_v<Layout, Kokkos::LayoutRight>;
|
||||
|
||||
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties, class... MaybeComparator>
|
||||
void sort_by_key_onedpl(
|
||||
const Kokkos::Experimental::SYCL& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
MaybeComparator&&... maybeComparator) {
|
||||
if (keys.stride(0) != 1 && values.stride(0) != 1) {
|
||||
Kokkos::abort(
|
||||
"SYCL sort_by_key only supports rank-1 Views with stride(0) = 1.");
|
||||
}
|
||||
|
||||
// Can't use Experimental::begin/end here since the oneDPL then assumes that
|
||||
// the data is on the host.
|
||||
auto queue = exec.sycl_queue();
|
||||
auto policy = oneapi::dpl::execution::make_device_policy(queue);
|
||||
const int n = keys.extent(0);
|
||||
oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(),
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <typename ExecutionSpace, typename PermutationView, typename ViewType>
|
||||
void applyPermutation(const ExecutionSpace& space,
|
||||
const PermutationView& permutation,
|
||||
const ViewType& view) {
|
||||
static_assert(std::is_integral<typename PermutationView::value_type>::value);
|
||||
|
||||
auto view_copy = Kokkos::create_mirror(
|
||||
Kokkos::view_alloc(space, typename ExecutionSpace::memory_space{},
|
||||
Kokkos::WithoutInitializing),
|
||||
view);
|
||||
Kokkos::deep_copy(space, view_copy, view);
|
||||
Kokkos::parallel_for(
|
||||
"Kokkos::sort_by_key_via_sort::permute_" + view.label(),
|
||||
Kokkos::RangePolicy<ExecutionSpace>(space, 0, view.extent(0)),
|
||||
KOKKOS_LAMBDA(int i) { view(i) = view_copy(permutation(i)); });
|
||||
}
|
||||
|
||||
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
|
||||
class ValuesDataType, class... ValuesProperties,
|
||||
class... MaybeComparator>
|
||||
void sort_by_key_via_sort(
|
||||
const ExecutionSpace& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
MaybeComparator&&... maybeComparator) {
|
||||
static_assert(sizeof...(MaybeComparator) <= 1);
|
||||
|
||||
auto const n = keys.size();
|
||||
|
||||
Kokkos::View<unsigned int*, ExecutionSpace> permute(
|
||||
Kokkos::view_alloc(exec, Kokkos::WithoutInitializing,
|
||||
"Kokkos::sort_by_key_via_sort::permute"),
|
||||
n);
|
||||
|
||||
// iota
|
||||
Kokkos::parallel_for(
|
||||
"Kokkos::sort_by_key_via_sort::iota",
|
||||
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
|
||||
KOKKOS_LAMBDA(int i) { permute(i) = i; });
|
||||
|
||||
using Layout =
|
||||
typename Kokkos::View<unsigned int*, ExecutionSpace>::array_layout;
|
||||
if constexpr (!sort_on_device_v<ExecutionSpace, Layout>) {
|
||||
auto host_keys = Kokkos::create_mirror_view(
|
||||
Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing),
|
||||
keys);
|
||||
auto host_permute = Kokkos::create_mirror_view(
|
||||
Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing),
|
||||
permute);
|
||||
Kokkos::deep_copy(exec, host_keys, keys);
|
||||
Kokkos::deep_copy(exec, host_permute, permute);
|
||||
|
||||
exec.fence("Kokkos::Impl::sort_by_key_via_sort: before host sort");
|
||||
Kokkos::DefaultHostExecutionSpace host_exec;
|
||||
|
||||
if constexpr (sizeof...(MaybeComparator) == 0) {
|
||||
Kokkos::sort(
|
||||
host_exec, host_permute,
|
||||
KOKKOS_LAMBDA(int i, int j) { return host_keys(i) < host_keys(j); });
|
||||
} else {
|
||||
auto keys_comparator =
|
||||
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
|
||||
Kokkos::sort(
|
||||
host_exec, host_permute, KOKKOS_LAMBDA(int i, int j) {
|
||||
return keys_comparator(host_keys(i), host_keys(j));
|
||||
});
|
||||
}
|
||||
host_exec.fence("Kokkos::Impl::sort_by_key_via_sort: after host sort");
|
||||
Kokkos::deep_copy(exec, permute, host_permute);
|
||||
} else {
|
||||
#ifdef KOKKOS_ENABLE_SYCL
|
||||
auto* raw_keys_in_comparator = keys.data();
|
||||
auto stride = keys.stride(0);
|
||||
if constexpr (sizeof...(MaybeComparator) == 0) {
|
||||
Kokkos::sort(
|
||||
exec, permute, KOKKOS_LAMBDA(int i, int j) {
|
||||
return raw_keys_in_comparator[i * stride] <
|
||||
raw_keys_in_comparator[j * stride];
|
||||
});
|
||||
} else {
|
||||
auto keys_comparator =
|
||||
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
|
||||
Kokkos::sort(
|
||||
exec, permute, KOKKOS_LAMBDA(int i, int j) {
|
||||
return keys_comparator(raw_keys_in_comparator[i * stride],
|
||||
raw_keys_in_comparator[j * stride]);
|
||||
});
|
||||
}
|
||||
#else
|
||||
if constexpr (sizeof...(MaybeComparator) == 0) {
|
||||
Kokkos::sort(
|
||||
exec, permute,
|
||||
KOKKOS_LAMBDA(int i, int j) { return keys(i) < keys(j); });
|
||||
} else {
|
||||
auto keys_comparator =
|
||||
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
|
||||
Kokkos::sort(
|
||||
exec, permute, KOKKOS_LAMBDA(int i, int j) {
|
||||
return keys_comparator(keys(i), keys(j));
|
||||
});
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
applyPermutation(exec, permute, keys);
|
||||
applyPermutation(exec, permute, values);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------
|
||||
//
|
||||
// specialize cases for sorting by key without comparator
|
||||
//
|
||||
// ------------------------------------------------------
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties>
|
||||
void sort_by_key_device_view_without_comparator(
|
||||
const Kokkos::Cuda& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
|
||||
sort_by_key_cudathrust(exec, keys, values);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties>
|
||||
void sort_by_key_device_view_without_comparator(
|
||||
const Kokkos::HIP& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
|
||||
sort_by_key_rocthrust(exec, keys, values);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties>
|
||||
void sort_by_key_device_view_without_comparator(
|
||||
const Kokkos::Experimental::SYCL& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
|
||||
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
if (keys.stride(0) == 1 && values.stride(0) == 1)
|
||||
sort_by_key_onedpl(exec, keys, values);
|
||||
else
|
||||
#endif
|
||||
sort_by_key_via_sort(exec, keys, values);
|
||||
}
|
||||
#endif
|
||||
|
||||
// fallback case
|
||||
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
|
||||
class ValuesDataType, class... ValuesProperties>
|
||||
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value>
|
||||
sort_by_key_device_view_without_comparator(
|
||||
const ExecutionSpace& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
|
||||
sort_by_key_via_sort(exec, keys, values);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------
|
||||
//
|
||||
// specialize cases for sorting by key with comparator
|
||||
//
|
||||
// ---------------------------------------------------
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
template <class ComparatorType, class KeysDataType, class... KeysProperties,
|
||||
class ValuesDataType, class... ValuesProperties>
|
||||
void sort_by_key_device_view_with_comparator(
|
||||
const Kokkos::Cuda& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
const ComparatorType& comparator) {
|
||||
sort_by_key_cudathrust(exec, keys, values, comparator);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class ComparatorType, class KeysDataType, class... KeysProperties,
|
||||
class ValuesDataType, class... ValuesProperties>
|
||||
void sort_by_key_device_view_with_comparator(
|
||||
const Kokkos::HIP& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
const ComparatorType& comparator) {
|
||||
sort_by_key_rocthrust(exec, keys, values, comparator);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
template <class ComparatorType, class KeysDataType, class... KeysProperties,
|
||||
class ValuesDataType, class... ValuesProperties>
|
||||
void sort_by_key_device_view_with_comparator(
|
||||
const Kokkos::Experimental::SYCL& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
const ComparatorType& comparator) {
|
||||
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
if (keys.stride(0) == 1 && values.stride(0) == 1)
|
||||
sort_by_key_onedpl(exec, keys, values, comparator);
|
||||
else
|
||||
#endif
|
||||
sort_by_key_via_sort(exec, keys, values, comparator);
|
||||
}
|
||||
#endif
|
||||
|
||||
// fallback case
|
||||
template <class ComparatorType, class ExecutionSpace, class KeysDataType,
|
||||
class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties>
|
||||
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value>
|
||||
sort_by_key_device_view_with_comparator(
|
||||
const ExecutionSpace& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
const ComparatorType& comparator) {
|
||||
sort_by_key_via_sort(exec, keys, values, comparator);
|
||||
}
|
||||
|
||||
#undef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
|
||||
} // namespace Kokkos::Impl
|
||||
#endif
|
||||
@ -63,6 +63,11 @@
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
#include <oneapi/dpl/execution>
|
||||
#include <oneapi/dpl/algorithm>
|
||||
@ -184,6 +189,26 @@ void sort_cudathrust(const Cuda& space,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class DataType, class... Properties, class... MaybeComparator>
|
||||
void sort_rocthrust(const HIP& space,
|
||||
const Kokkos::View<DataType, Properties...>& view,
|
||||
MaybeComparator&&... maybeComparator) {
|
||||
using ViewType = Kokkos::View<DataType, Properties...>;
|
||||
static_assert(ViewType::rank == 1,
|
||||
"Kokkos::sort: currently only supports rank-1 Views.");
|
||||
|
||||
if (view.extent(0) <= 1) {
|
||||
return;
|
||||
}
|
||||
const auto exec = thrust::hip::par.on(space.hip_stream());
|
||||
auto first = ::Kokkos::Experimental::begin(view);
|
||||
auto last = ::Kokkos::Experimental::end(view);
|
||||
thrust::sort(exec, first, last,
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
template <class DataType, class... Properties, class... MaybeComparator>
|
||||
void sort_onedpl(const Kokkos::Experimental::SYCL& space,
|
||||
@ -274,6 +299,14 @@ void sort_device_view_without_comparator(
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class DataType, class... Properties>
|
||||
void sort_device_view_without_comparator(
|
||||
const HIP& exec, const Kokkos::View<DataType, Properties...>& view) {
|
||||
sort_rocthrust(exec, view);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
template <class DataType, class... Properties>
|
||||
void sort_device_view_without_comparator(
|
||||
@ -320,6 +353,15 @@ void sort_device_view_with_comparator(
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class ComparatorType, class DataType, class... Properties>
|
||||
void sort_device_view_with_comparator(
|
||||
const HIP& exec, const Kokkos::View<DataType, Properties...>& view,
|
||||
const ComparatorType& comparator) {
|
||||
sort_rocthrust(exec, view, comparator);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
template <class ComparatorType, class DataType, class... Properties>
|
||||
void sort_device_view_with_comparator(
|
||||
|
||||
@ -50,7 +50,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -66,7 +66,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -93,7 +93,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto copy(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -50,7 +50,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy_backward(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -65,7 +65,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy_backward(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -92,7 +92,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto copy_backward(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -54,7 +54,8 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy_if(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
Predicate pred) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -69,7 +70,8 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy_if(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
Predicate pred) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -96,7 +98,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto copy_if(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -51,7 +51,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy_n(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -66,7 +66,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy_n(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -93,7 +93,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto copy_n(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -80,7 +80,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
bool equal(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -96,7 +96,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
bool equal(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -111,7 +111,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
bool equal(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
BinaryPredicateType predicate) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
@ -128,7 +128,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
bool equal(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
BinaryPredicateType predicate) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
@ -227,7 +227,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION bool equal(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -243,7 +243,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION bool equal(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
BinaryPredicateType predicate) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -19,7 +19,6 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include "impl/Kokkos_Constraints.hpp"
|
||||
#include "Kokkos_Swap.hpp"
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
@ -33,7 +32,7 @@ struct StdIterSwapFunctor {
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(int i) const {
|
||||
(void)i;
|
||||
::Kokkos::Experimental::swap(*m_a, *m_b);
|
||||
::Kokkos::kokkos_swap(*m_a, *m_b);
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
@ -58,6 +57,16 @@ void iter_swap(IteratorType1 a, IteratorType2 b) {
|
||||
Impl::iter_swap_impl(a, b);
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
template <class T>
|
||||
KOKKOS_DEPRECATED_WITH_COMMENT("Use Kokkos::kokkos_swap instead!")
|
||||
KOKKOS_FUNCTION
|
||||
void swap(T& a, T& b) noexcept(::Kokkos::kokkos_swap(std::declval<T&>(),
|
||||
std::declval<T&>())) {
|
||||
::Kokkos::kokkos_swap(a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
|
||||
@ -54,7 +54,7 @@ template <
|
||||
bool lexicographical_compare(
|
||||
const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -71,7 +71,7 @@ template <
|
||||
bool lexicographical_compare(
|
||||
const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -112,7 +112,8 @@ template <
|
||||
bool lexicographical_compare(
|
||||
const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
ComparatorType comp) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -129,7 +130,8 @@ template <
|
||||
bool lexicographical_compare(
|
||||
const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
ComparatorType comp) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -161,7 +163,7 @@ template <class TeamHandleType, class DataType1, class... Properties1,
|
||||
KOKKOS_FUNCTION bool lexicographical_compare(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -187,7 +189,8 @@ template <class TeamHandleType, class DataType1, class... Properties1,
|
||||
KOKKOS_FUNCTION bool lexicographical_compare(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
ComparatorType comp) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
|
||||
@ -50,7 +50,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto move(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -64,7 +64,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto move(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -92,7 +92,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto move(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -41,7 +41,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto move_backward(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -65,7 +65,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto move_backward(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -94,7 +94,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto move_backward(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -50,7 +50,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto reverse_copy(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -65,7 +65,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto reverse_copy(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -94,7 +94,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto reverse_copy(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -40,7 +40,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
|
||||
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto swap_ranges(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -64,7 +64,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
|
||||
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto swap_ranges(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -94,7 +94,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto swap_ranges(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
|
||||
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto transform(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
UnaryOperation unary_op) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
@ -73,7 +73,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
|
||||
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto transform(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
UnaryOperation unary_op) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
@ -119,7 +119,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
|
||||
auto transform(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source1,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& source2,
|
||||
::Kokkos::View<DataType3, Properties3...>& dest,
|
||||
const ::Kokkos::View<DataType3, Properties3...>& dest,
|
||||
BinaryOperation binary_op) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);
|
||||
@ -137,7 +137,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
|
||||
auto transform(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source1,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& source2,
|
||||
::Kokkos::View<DataType3, Properties3...>& dest,
|
||||
const ::Kokkos::View<DataType3, Properties3...>& dest,
|
||||
BinaryOperation binary_op) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);
|
||||
@ -174,7 +174,8 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto transform(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest, UnaryOperation unary_op) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
UnaryOperation unary_op) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -207,7 +208,7 @@ KOKKOS_FUNCTION auto transform(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source1,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& source2,
|
||||
::Kokkos::View<DataType3, Properties3...>& dest,
|
||||
const ::Kokkos::View<DataType3, Properties3...>& dest,
|
||||
BinaryOperation binary_op) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);
|
||||
|
||||
@ -47,8 +47,9 @@ struct ExclusiveScanDefaultFunctorForKnownNeutralElement {
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(const IndexType i, ValueType& update,
|
||||
const bool final_pass) const {
|
||||
const auto tmp = m_first_from[i];
|
||||
if (final_pass) m_first_dest[i] = update + m_init_value;
|
||||
update += m_first_from[i];
|
||||
update += tmp;
|
||||
}
|
||||
};
|
||||
|
||||
@ -73,6 +74,7 @@ struct ExclusiveScanDefaultFunctorWithValueWrapper {
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(const IndexType i, value_type& update,
|
||||
const bool final_pass) const {
|
||||
const auto tmp = value_type{m_first_from[i], false};
|
||||
if (final_pass) {
|
||||
if (i == 0) {
|
||||
m_first_dest[i] = m_init_value;
|
||||
@ -81,7 +83,6 @@ struct ExclusiveScanDefaultFunctorWithValueWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
const auto tmp = value_type{m_first_from[i], false};
|
||||
this->join(update, tmp);
|
||||
}
|
||||
|
||||
@ -132,6 +133,7 @@ struct TransformExclusiveScanFunctorWithValueWrapper {
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(const IndexType i, value_type& update,
|
||||
const bool final_pass) const {
|
||||
const auto tmp = value_type{m_unary_op(m_first_from[i]), false};
|
||||
if (final_pass) {
|
||||
if (i == 0) {
|
||||
// for both ExclusiveScan and TransformExclusiveScan,
|
||||
@ -142,7 +144,6 @@ struct TransformExclusiveScanFunctorWithValueWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
const auto tmp = value_type{m_unary_op(m_first_from[i]), false};
|
||||
this->join(update, tmp);
|
||||
}
|
||||
|
||||
@ -190,6 +191,7 @@ struct TransformExclusiveScanFunctorWithoutValueWrapper {
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(const IndexType i, ValueType& update,
|
||||
const bool final_pass) const {
|
||||
const auto tmp = ValueType{m_unary_op(m_first_from[i])};
|
||||
if (final_pass) {
|
||||
if (i == 0) {
|
||||
// for both ExclusiveScan and TransformExclusiveScan,
|
||||
@ -200,7 +202,6 @@ struct TransformExclusiveScanFunctorWithoutValueWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
const auto tmp = ValueType{m_unary_op(m_first_from[i])};
|
||||
this->join(update, tmp);
|
||||
}
|
||||
|
||||
|
||||
@ -46,15 +46,14 @@ struct StdRemoveIfStage1Functor {
|
||||
void operator()(const IndexType i, IndexType& update,
|
||||
const bool final_pass) const {
|
||||
auto& myval = m_first_from[i];
|
||||
if (final_pass) {
|
||||
if (!m_must_remove(myval)) {
|
||||
|
||||
if (!m_must_remove(myval)) {
|
||||
if (final_pass) {
|
||||
// calling move here is ok because we are inside final pass
|
||||
// we are calling move assign as specified by the std
|
||||
m_first_dest[update] = std::move(myval);
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_must_remove(myval)) {
|
||||
update += 1;
|
||||
}
|
||||
}
|
||||
@ -108,7 +107,9 @@ IteratorType remove_if_exespace_impl(const std::string& label,
|
||||
// create helper tmp view
|
||||
using value_type = typename IteratorType::value_type;
|
||||
using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
|
||||
tmp_view_type tmp_view("std_remove_if_tmp_view", keep_count);
|
||||
tmp_view_type tmp_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, ex,
|
||||
"std_remove_if_tmp_view"),
|
||||
keep_count);
|
||||
using tmp_readwrite_iterator_type = decltype(begin(tmp_view));
|
||||
|
||||
// in stage 1, *move* all elements to keep from original range to tmp
|
||||
|
||||
@ -21,7 +21,6 @@
|
||||
#include "Kokkos_Constraints.hpp"
|
||||
#include "Kokkos_HelperPredicates.hpp"
|
||||
#include <std_algorithms/Kokkos_Distance.hpp>
|
||||
#include <std_algorithms/Kokkos_Swap.hpp>
|
||||
#include <string>
|
||||
|
||||
namespace Kokkos {
|
||||
@ -39,7 +38,7 @@ struct StdReverseFunctor {
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(index_type i) const {
|
||||
::Kokkos::Experimental::swap(m_first[i], m_last[-i - 1]);
|
||||
::Kokkos::kokkos_swap(m_first[i], m_last[-i - 1]);
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
|
||||
@ -126,10 +126,11 @@ KOKKOS_FUNCTION IteratorType shift_left_team_impl(
|
||||
// execution space impl because for this team impl we are
|
||||
// within a parallel region, so for now we solve serially
|
||||
|
||||
const std::size_t numElementsToMove =
|
||||
using difference_type = typename IteratorType::difference_type;
|
||||
const difference_type numElementsToMove =
|
||||
::Kokkos::Experimental::distance(first + n, last);
|
||||
Kokkos::single(Kokkos::PerTeam(teamHandle), [=]() {
|
||||
for (std::size_t i = 0; i < numElementsToMove; ++i) {
|
||||
for (difference_type i = 0; i < numElementsToMove; ++i) {
|
||||
first[i] = std::move(first[i + n]);
|
||||
}
|
||||
});
|
||||
|
||||
@ -103,26 +103,6 @@ IteratorType shift_right_exespace_impl(
|
||||
return first + n;
|
||||
}
|
||||
|
||||
template <class Iterator>
|
||||
struct StdShiftRightTeamSingleFunctor {
|
||||
Iterator m_first;
|
||||
Iterator m_last;
|
||||
std::size_t m_shift;
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void operator()() const {
|
||||
// the impl function calling this functor guarantees that
|
||||
// - m_shift is non-negative
|
||||
// - m_first, m_last identify a valid range with m_last > m_first
|
||||
// - m_shift is less than m_last - m_first
|
||||
// so I can safely use std::size_t here
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
StdShiftRightTeamSingleFunctor(Iterator _first, Iterator _last, std::size_t n)
|
||||
: m_first(std::move(_first)), m_last(std::move(_last)), m_shift(n) {}
|
||||
};
|
||||
|
||||
template <class TeamHandleType, class IteratorType>
|
||||
KOKKOS_FUNCTION IteratorType shift_right_team_impl(
|
||||
const TeamHandleType& teamHandle, IteratorType first, IteratorType last,
|
||||
@ -145,10 +125,11 @@ KOKKOS_FUNCTION IteratorType shift_right_team_impl(
|
||||
// execution space impl because for this team impl we are
|
||||
// within a parallel region, so for now we solve serially
|
||||
|
||||
const std::size_t numElementsToMove =
|
||||
using difference_type = typename IteratorType::difference_type;
|
||||
const difference_type numElementsToMove =
|
||||
::Kokkos::Experimental::distance(first, last - n);
|
||||
Kokkos::single(Kokkos::PerTeam(teamHandle), [=]() {
|
||||
for (std::size_t i = 0; i < numElementsToMove; ++i) {
|
||||
for (difference_type i = 0; i < numElementsToMove; ++i) {
|
||||
last[-i - 1] = std::move(last[-n - i - 1]);
|
||||
}
|
||||
});
|
||||
|
||||
@ -21,7 +21,6 @@
|
||||
#include "Kokkos_Constraints.hpp"
|
||||
#include "Kokkos_HelperPredicates.hpp"
|
||||
#include <std_algorithms/Kokkos_Distance.hpp>
|
||||
#include <std_algorithms/Kokkos_Swap.hpp>
|
||||
#include <string>
|
||||
|
||||
namespace Kokkos {
|
||||
@ -36,7 +35,7 @@ struct StdSwapRangesFunctor {
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(index_type i) const {
|
||||
::Kokkos::Experimental::swap(m_first1[i], m_first2[i]);
|
||||
::Kokkos::kokkos_swap(m_first1[i], m_first2[i]);
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
|
||||
@ -105,7 +105,9 @@ IteratorType unique_exespace_impl(const std::string& label,
|
||||
// using the same algorithm used for unique_copy but we now move things
|
||||
using value_type = typename IteratorType::value_type;
|
||||
using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
|
||||
tmp_view_type tmp_view("std_unique_tmp_view", num_elements_to_explore);
|
||||
tmp_view_type tmp_view(Kokkos::view_alloc(ex, Kokkos::WithoutInitializing,
|
||||
"std_unique_tmp_view"),
|
||||
num_elements_to_explore);
|
||||
|
||||
// scan extent is: num_elements_to_explore - 1
|
||||
// for same reason as the one explained in unique_copy
|
||||
|
||||
@ -25,6 +25,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
set(ALGO_SORT_SOURCES)
|
||||
foreach(SOURCE_Input
|
||||
TestSort
|
||||
TestSortByKey
|
||||
TestSortCustomComp
|
||||
TestBinSortA
|
||||
TestBinSortB
|
||||
@ -57,35 +58,37 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
configure_file(${dir}/dummy.cpp ${file})
|
||||
list(APPEND ALGO_RANDOM_SOURCES ${file})
|
||||
endforeach()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std set A
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_A)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std set A
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_A)
|
||||
foreach(Name
|
||||
StdReducers
|
||||
StdAlgorithmsConstraints
|
||||
RandomAccessIterator
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_A Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_A Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std set B
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_B)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std set B
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_B)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsMinMaxElementOps
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_B Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_B Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std set C
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_C)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std set C
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_C)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsLexicographicalCompare
|
||||
StdAlgorithmsForEach
|
||||
@ -100,15 +103,15 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsSearch_n
|
||||
StdAlgorithmsMismatch
|
||||
StdAlgorithmsMoveBackward
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_C Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_C Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std set D
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_D)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std set D
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_D)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsModOps
|
||||
StdAlgorithmsModSeqOps
|
||||
@ -128,15 +131,15 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsReverse
|
||||
StdAlgorithmsShiftLeft
|
||||
StdAlgorithmsShiftRight
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_D Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_D Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std set E
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_E)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std set E
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_E)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsIsSorted
|
||||
StdAlgorithmsIsSortedUntil
|
||||
@ -149,83 +152,83 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsTransformUnaryOp
|
||||
StdAlgorithmsTransformExclusiveScan
|
||||
StdAlgorithmsTransformInclusiveScan
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_E Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_E Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team Q
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_Q)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team Q
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_Q)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamInclusiveScan
|
||||
StdAlgorithmsTeamTransformInclusiveScan
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_Q Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_Q Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team P
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_P)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team P
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_P)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamExclusiveScan
|
||||
StdAlgorithmsTeamTransformExclusiveScan
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_P Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_P Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team M
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_M)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team M
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_M)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamTransformUnaryOp
|
||||
StdAlgorithmsTeamTransformBinaryOp
|
||||
StdAlgorithmsTeamGenerate
|
||||
StdAlgorithmsTeamGenerate_n
|
||||
StdAlgorithmsTeamSwapRanges
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_M Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_M Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team L
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_L)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team L
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_L)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamIsSorted
|
||||
StdAlgorithmsTeamIsSortedUntil
|
||||
StdAlgorithmsTeamIsPartitioned
|
||||
StdAlgorithmsTeamPartitionCopy
|
||||
StdAlgorithmsTeamPartitionPoint
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_L Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_L Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team I
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_I)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team I
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_I)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamUnique
|
||||
StdAlgorithmsTeamAdjacentDifference
|
||||
StdAlgorithmsTeamReduce
|
||||
StdAlgorithmsTeamTransformReduce
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_I Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_I Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team H
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_H)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team H
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_H)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamCopy
|
||||
StdAlgorithmsTeamCopy_n
|
||||
@ -236,43 +239,43 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsTeamRemoveIf
|
||||
StdAlgorithmsTeamRemoveCopy
|
||||
StdAlgorithmsTeamRemoveCopyIf
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_H Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_H Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team G
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_G)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team G
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_G)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamMove
|
||||
StdAlgorithmsTeamMoveBackward
|
||||
StdAlgorithmsTeamShiftLeft
|
||||
StdAlgorithmsTeamShiftRight
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_G Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_G Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team F
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_F)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team F
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_F)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamReverse
|
||||
StdAlgorithmsTeamReverseCopy
|
||||
StdAlgorithmsTeamRotate
|
||||
StdAlgorithmsTeamRotateCopy
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_F Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_F Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team E
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_E)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team E
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_E)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamFill
|
||||
StdAlgorithmsTeamFill_n
|
||||
@ -280,28 +283,28 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsTeamReplaceIf
|
||||
StdAlgorithmsTeamReplaceCopy
|
||||
StdAlgorithmsTeamReplaceCopyIf
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_E Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_E Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team D
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_D)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team D
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_D)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamMinElement
|
||||
StdAlgorithmsTeamMaxElement
|
||||
StdAlgorithmsTeamMinMaxElement
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_D Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_D Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team C
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_C)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team C
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_C)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamFind
|
||||
StdAlgorithmsTeamFindIf
|
||||
@ -310,29 +313,29 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsTeamAnyOf
|
||||
StdAlgorithmsTeamNoneOf
|
||||
StdAlgorithmsTeamSearchN
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_C Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_C Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team B
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_B)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team B
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_B)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamEqual
|
||||
StdAlgorithmsTeamSearch
|
||||
StdAlgorithmsTeamFindEnd
|
||||
StdAlgorithmsTeamFindFirstOf
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_B Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_B Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team A
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_A)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team A
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_A)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamAdjacentFind
|
||||
StdAlgorithmsTeamCount
|
||||
@ -341,11 +344,8 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsTeamForEachN
|
||||
StdAlgorithmsTeamLexicographicalCompare
|
||||
StdAlgorithmsTeamMismatch
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_A Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
endif()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_A Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# FIXME_OPENMPTARGET - remove sort test as it leads to ICE with clang/16 and above at compile time.
|
||||
|
||||
@ -27,13 +27,13 @@ TARGETS =
|
||||
|
||||
tmp := $(foreach device, $(KOKKOS_DEVICELIST), \
|
||||
$(if $(filter Test$(device).cpp, $(shell ls Test$(device).cpp 2>/dev/null)),,\
|
||||
$(shell echo "\#include <Test"${device}"_Category.hpp>" > Test$(device).cpp); \
|
||||
$(shell echo "\#include <TestRandom.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "\#include <TestSort.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "\#include <TestBinSortA.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "\#include <TestBinSortB.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "\#include <TestNestedSort.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "\#include <TestSortCustomComp.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <Test"${device}"_Category.hpp>" > Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <TestRandom.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <TestSort.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <TestBinSortA.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <TestBinSortB.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <TestNestedSort.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <TestSortCustomComp.hpp>" >> Test$(device).cpp); \
|
||||
) \
|
||||
)
|
||||
|
||||
|
||||
241
lib/kokkos/algorithms/unit_tests/TestSortByKey.hpp
Normal file
241
lib/kokkos/algorithms/unit_tests/TestSortByKey.hpp
Normal file
@ -0,0 +1,241 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 4.0
|
||||
// Copyright (2022) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://kokkos.org/LICENSE for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_BY_KEY_HPP
|
||||
#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_BY_KEY_HPP
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Random.hpp>
|
||||
#include <Kokkos_Sort.hpp>
|
||||
|
||||
#include <utility> // pair
|
||||
|
||||
namespace Test {
|
||||
namespace SortImpl {
|
||||
|
||||
struct Less {
|
||||
template <class ValueType>
|
||||
KOKKOS_INLINE_FUNCTION bool operator()(const ValueType &lhs,
|
||||
const ValueType &rhs) const {
|
||||
return lhs < rhs;
|
||||
}
|
||||
};
|
||||
|
||||
struct Greater {
|
||||
template <class ValueType>
|
||||
KOKKOS_INLINE_FUNCTION bool operator()(const ValueType &lhs,
|
||||
const ValueType &rhs) const {
|
||||
return lhs > rhs;
|
||||
}
|
||||
};
|
||||
|
||||
template <class ExecutionSpace, class Keys, class Permute,
|
||||
class Comparator = Less>
|
||||
struct is_sorted_by_key_struct {
|
||||
Keys keys;
|
||||
Keys keys_orig;
|
||||
Permute permute;
|
||||
Comparator comparator;
|
||||
|
||||
is_sorted_by_key_struct(Keys keys_, Keys keys_orig_, Permute permute_,
|
||||
Comparator comparator_ = Comparator{})
|
||||
: keys(keys_),
|
||||
keys_orig(keys_orig_),
|
||||
permute(permute_),
|
||||
comparator(comparator_) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int i, unsigned int &count) const {
|
||||
if (i < keys.extent_int(0) - 1 && comparator(keys(i + 1), keys(i))) ++count;
|
||||
if (keys(i) != keys_orig(permute(i))) ++count;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ExecutionSpace, typename ViewType>
|
||||
void iota(ExecutionSpace const &space, ViewType const &v,
|
||||
typename ViewType::value_type value = 0) {
|
||||
using ValueType = typename ViewType::value_type;
|
||||
Kokkos::parallel_for(
|
||||
"ArborX::Algorithms::iota",
|
||||
Kokkos::RangePolicy<ExecutionSpace>(space, 0, v.extent(0)),
|
||||
KOKKOS_LAMBDA(int i) { v(i) = value + (ValueType)i; });
|
||||
}
|
||||
|
||||
} // namespace SortImpl
|
||||
|
||||
TEST(TEST_CATEGORY, SortByKeyEmptyView) {
|
||||
using ExecutionSpace = TEST_EXECSPACE;
|
||||
|
||||
// does not matter if we use int or something else
|
||||
Kokkos::View<int *, ExecutionSpace> keys("keys", 0);
|
||||
Kokkos::View<float *, ExecutionSpace> values("values", 0);
|
||||
|
||||
ASSERT_NO_THROW(
|
||||
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values));
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, SortByKey) {
|
||||
using ExecutionSpace = TEST_EXECSPACE;
|
||||
using MemorySpace = typename ExecutionSpace::memory_space;
|
||||
|
||||
ExecutionSpace space{};
|
||||
|
||||
for (auto keys_vector : {std::vector<int>{36, 19, 25, 17, 3, 7, 1, 2, 9},
|
||||
std::vector<int>{36, 19, 25, 17, 3, 9, 1, 2, 7},
|
||||
std::vector<int>{100, 19, 36, 17, 3, 25, 1, 2, 7},
|
||||
std::vector<int>{15, 5, 11, 3, 4, 8}}) {
|
||||
auto const n = keys_vector.size();
|
||||
|
||||
auto keys = Kokkos::create_mirror_view_and_copy(
|
||||
MemorySpace{},
|
||||
Kokkos::View<int *, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(
|
||||
keys_vector.data(), n));
|
||||
|
||||
auto keys_orig = Kokkos::create_mirror(space, keys);
|
||||
Kokkos::deep_copy(space, keys_orig, keys);
|
||||
|
||||
Kokkos::View<int *, ExecutionSpace> permute("permute", n);
|
||||
SortImpl::iota(space, permute);
|
||||
|
||||
Kokkos::Experimental::sort_by_key(space, keys, permute);
|
||||
|
||||
unsigned int sort_fails = 0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
|
||||
SortImpl::is_sorted_by_key_struct<ExecutionSpace, decltype(keys),
|
||||
decltype(permute)>(keys, keys_orig,
|
||||
permute),
|
||||
sort_fails);
|
||||
|
||||
ASSERT_EQ(sort_fails, 0u);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, SortByKeyWithComparator) {
|
||||
using ExecutionSpace = TEST_EXECSPACE;
|
||||
using MemorySpace = typename ExecutionSpace::memory_space;
|
||||
|
||||
ExecutionSpace space{};
|
||||
|
||||
SortImpl::Greater comparator;
|
||||
|
||||
for (auto keys_vector : {std::vector<int>{36, 19, 25, 17, 3, 7, 1, 2, 9},
|
||||
std::vector<int>{36, 19, 25, 17, 3, 9, 1, 2, 7},
|
||||
std::vector<int>{100, 19, 36, 17, 3, 25, 1, 2, 7},
|
||||
std::vector<int>{15, 5, 11, 3, 4, 8}}) {
|
||||
auto const n = keys_vector.size();
|
||||
|
||||
auto keys = Kokkos::create_mirror_view_and_copy(
|
||||
MemorySpace{},
|
||||
Kokkos::View<int *, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(
|
||||
keys_vector.data(), n));
|
||||
|
||||
auto keys_orig = Kokkos::create_mirror(space, keys);
|
||||
Kokkos::deep_copy(space, keys_orig, keys);
|
||||
|
||||
Kokkos::View<int *, ExecutionSpace> permute("permute", n);
|
||||
SortImpl::iota(space, permute);
|
||||
|
||||
Kokkos::Experimental::sort_by_key(space, keys, permute, comparator);
|
||||
|
||||
unsigned int sort_fails = 0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
|
||||
SortImpl::is_sorted_by_key_struct<ExecutionSpace, decltype(keys),
|
||||
decltype(permute), SortImpl::Greater>(
|
||||
keys, keys_orig, permute, comparator),
|
||||
sort_fails);
|
||||
|
||||
ASSERT_EQ(sort_fails, 0u);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, SortByKeyStaticExtents) {
|
||||
using ExecutionSpace = TEST_EXECSPACE;
|
||||
|
||||
ExecutionSpace space{};
|
||||
|
||||
Kokkos::View<int[10], ExecutionSpace> keys("keys");
|
||||
|
||||
Kokkos::View<int[10], ExecutionSpace> values_static("values_static");
|
||||
ASSERT_NO_THROW(
|
||||
Kokkos::Experimental::sort_by_key(space, keys, values_static));
|
||||
|
||||
Kokkos::View<int *, ExecutionSpace> values_dynamic("values_dynamic", 10);
|
||||
ASSERT_NO_THROW(
|
||||
Kokkos::Experimental::sort_by_key(space, keys, values_dynamic));
|
||||
}
|
||||
|
||||
template <typename ExecutionSpace, typename Keys, typename Values>
|
||||
void buildViewsForStrided(ExecutionSpace const &space, int n, Keys &keys,
|
||||
Values &values) {
|
||||
Kokkos::parallel_for(
|
||||
"create_data",
|
||||
Kokkos::MDRangePolicy<Kokkos::Rank<3>, ExecutionSpace>(space, {0, 0, 0},
|
||||
{n, n, n}),
|
||||
KOKKOS_LAMBDA(int i, int j, int k) {
|
||||
keys(i, j, k) = n - i;
|
||||
values(i, j, k) = j;
|
||||
});
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, SortByKeyWithStrides) {
|
||||
using ExecutionSpace = TEST_EXECSPACE;
|
||||
|
||||
ExecutionSpace space{};
|
||||
|
||||
auto const n = 10;
|
||||
|
||||
Kokkos::View<int ***, ExecutionSpace> keys("keys", n, n, n);
|
||||
Kokkos::View<int ***, ExecutionSpace> values("values", n, n, n);
|
||||
buildViewsForStrided(space, n, keys, values);
|
||||
|
||||
auto keys_sub = Kokkos::subview(keys, Kokkos::ALL(), 1, 2);
|
||||
auto values_sub = Kokkos::subview(values, 4, Kokkos::ALL(), 6);
|
||||
|
||||
auto keys_orig = Kokkos::create_mirror(space, keys_sub);
|
||||
Kokkos::deep_copy(space, keys_orig, keys_sub);
|
||||
|
||||
Kokkos::Experimental::sort_by_key(space, keys_sub, values_sub);
|
||||
|
||||
unsigned int sort_fails = 0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
|
||||
SortImpl::is_sorted_by_key_struct<ExecutionSpace, decltype(keys_sub),
|
||||
decltype(values_sub)>(
|
||||
keys_sub, keys_orig, values_sub),
|
||||
sort_fails);
|
||||
|
||||
ASSERT_EQ(sort_fails, 0u);
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, SortByKeyKeysLargerThanValues) {
|
||||
using ExecutionSpace = TEST_EXECSPACE;
|
||||
|
||||
// does not matter if we use int or something else
|
||||
Kokkos::View<int *, ExecutionSpace> keys("keys", 3);
|
||||
Kokkos::View<float *, ExecutionSpace> values("values", 1);
|
||||
|
||||
ASSERT_DEATH(
|
||||
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values),
|
||||
"values and keys extents must be the same");
|
||||
ASSERT_DEATH(Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values,
|
||||
SortImpl::Greater{}),
|
||||
"values and keys extents must be the same");
|
||||
}
|
||||
|
||||
} // namespace Test
|
||||
#endif
|
||||
@ -239,16 +239,8 @@ KOKKOS_FUNCTION bool team_members_have_matching_result(
|
||||
// set accum to 1 if a mismach is found
|
||||
const bool mismatch = memberValue != target;
|
||||
int accum = static_cast<int>(mismatch);
|
||||
// FIXME_OPENMPTARGET: team API does not meet the TeamHandle concept and
|
||||
// ignores the reducer passed
|
||||
#if defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
Kokkos::Sum<int> dummyReducer(accum);
|
||||
const auto result = teamHandle.team_reduce(accum, dummyReducer);
|
||||
return (result == 0);
|
||||
#else
|
||||
teamHandle.team_reduce(Kokkos::Sum<int>(accum));
|
||||
return (accum == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class ValueType1, class ValueType2>
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
#include <TestStdAlgorithmsCommon.hpp>
|
||||
#include <utility>
|
||||
#include <iomanip>
|
||||
|
||||
namespace Test {
|
||||
namespace stdalgos {
|
||||
@ -132,47 +133,6 @@ void my_host_exclusive_scan(it1 first, it1 last, it2 dest, ValType init,
|
||||
}
|
||||
}
|
||||
|
||||
template <class ViewType1, class ViewType2, class ValueType, class BinaryOp>
|
||||
void verify_data(ViewType1 data_view, // contains data
|
||||
ViewType2 test_view, // the view to test
|
||||
ValueType init_value, BinaryOp bop) {
|
||||
//! always careful because views might not be deep copyable
|
||||
|
||||
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
|
||||
auto data_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
|
||||
|
||||
using gold_view_value_type = typename ViewType2::value_type;
|
||||
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
|
||||
"goldh", data_view.extent(0));
|
||||
my_host_exclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
|
||||
KE::begin(gold_h), init_value, bop);
|
||||
|
||||
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
|
||||
auto test_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
||||
if (test_view_h.extent(0) > 0) {
|
||||
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
|
||||
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
|
||||
// << gold_h(i) << " " << test_view_h(i) << " "
|
||||
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
|
||||
if (std::is_same<gold_view_value_type, int>::value) {
|
||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||
} else {
|
||||
const auto error =
|
||||
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
|
||||
if (error > 1e-10) {
|
||||
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
|
||||
<< " " << gold_h(i) << " " << test_view_h(i) << " "
|
||||
<< std::abs(static_cast<double>(gold_h(i) - test_view_h(i)))
|
||||
<< std::endl;
|
||||
}
|
||||
EXPECT_LT(error, 1e-10);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class ValueType>
|
||||
struct MultiplyFunctor {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -189,107 +149,153 @@ struct SumFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
struct VerifyData {
|
||||
template <class ViewType1, class ViewType2, class ValueType, class BinaryOp>
|
||||
void operator()(ViewType1 data_view, // contains data
|
||||
ViewType2 test_view, // the view to test
|
||||
ValueType init_value, BinaryOp bop) {
|
||||
//! always careful because views might not be deep copyable
|
||||
|
||||
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
|
||||
auto data_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
|
||||
|
||||
using gold_view_value_type = typename ViewType2::value_type;
|
||||
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
|
||||
"goldh", data_view.extent(0));
|
||||
my_host_exclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
|
||||
KE::begin(gold_h), init_value, bop);
|
||||
|
||||
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
|
||||
auto test_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
||||
if (test_view_h.extent(0) > 0) {
|
||||
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
|
||||
if (std::is_same<gold_view_value_type, int>::value) {
|
||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||
} else {
|
||||
const auto error =
|
||||
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
|
||||
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
|
||||
<< static_cast<double>(test_view_h(i)) << " "
|
||||
<< static_cast<double>(gold_h(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class ViewType1, class ViewType2, class ValueType>
|
||||
void operator()(ViewType1 data_view, // contains data
|
||||
ViewType2 test_view, // the view to test
|
||||
ValueType init_value) {
|
||||
(*this)(data_view, test_view, init_value, SumFunctor<ValueType>());
|
||||
}
|
||||
};
|
||||
|
||||
std::string value_type_to_string(int) { return "int"; }
|
||||
|
||||
std::string value_type_to_string(double) { return "double"; }
|
||||
|
||||
template <class Tag, class ValueType, class InfoType>
|
||||
void run_single_scenario_default_op(const InfoType& scenario_info,
|
||||
ValueType init_value) {
|
||||
using default_op = SumFunctor<ValueType>;
|
||||
template <class Tag, class ValueType, class InfoType, class... OpOrEmpty>
|
||||
void run_single_scenario(const InfoType& scenario_info, ValueType init_value,
|
||||
OpOrEmpty... empty_or_op) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
// std::cout << "exclusive_scan default op: " << name << ", "
|
||||
// << view_tag_to_string(Tag{}) << ", "
|
||||
// << value_type_to_string(ValueType()) << ", "
|
||||
// << "init = " << init_value << std::endl;
|
||||
|
||||
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
|
||||
auto view_from = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
|
||||
fill_view(view_from, name);
|
||||
// view_dest is filled with zeros before calling the algorithm everytime to
|
||||
// ensure the algorithm does something meaningful
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest),
|
||||
init_value);
|
||||
init_value, empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, default_op());
|
||||
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest),
|
||||
init_value);
|
||||
init_value, empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, default_op());
|
||||
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value);
|
||||
auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value,
|
||||
empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, default_op());
|
||||
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest,
|
||||
init_value);
|
||||
init_value, empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, default_op());
|
||||
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType, class InfoType, class BinaryOp>
|
||||
void run_single_scenario_custom_op(const InfoType& scenario_info,
|
||||
ValueType init_value, BinaryOp bop) {
|
||||
template <class Tag, class ValueType, class InfoType, class... OpOrEmpty>
|
||||
void run_single_scenario_inplace(const InfoType& scenario_info,
|
||||
ValueType init_value,
|
||||
OpOrEmpty... empty_or_op) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
// std::cout << "exclusive_scan custom op: " << name << ", "
|
||||
// << view_tag_to_string(Tag{}) << ", "
|
||||
// << value_type_to_string(ValueType()) << ", "
|
||||
// << "init = " << init_value << std::endl;
|
||||
|
||||
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
|
||||
auto view_from = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
|
||||
fill_view(view_from, name);
|
||||
// since here we call the in-place operation, we need to use two views:
|
||||
// view1: filled according to what the scenario asks for and is not modified
|
||||
// view2: filled according to what the scenario asks for and used for the
|
||||
// in-place op Therefore, after the op is done, view2 should contain the
|
||||
// result of doing exclusive scan NOTE: view2 is filled below every time
|
||||
// because the algorithm acts in place
|
||||
|
||||
auto view1 =
|
||||
create_view<ValueType>(Tag{}, view_ext, "exclusive_scan_inplace_view1");
|
||||
fill_view(view1, name);
|
||||
|
||||
auto view2 =
|
||||
create_view<ValueType>(Tag{}, view_ext, "exclusive_scan_inplace_view2");
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest),
|
||||
init_value, bop);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, bop);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::exclusive_scan(exespace(), KE::cbegin(view2), KE::cend(view2),
|
||||
KE::begin(view2), init_value, empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest),
|
||||
init_value, bop);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, bop);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view2),
|
||||
KE::cend(view2), KE::begin(view2), init_value,
|
||||
empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r =
|
||||
KE::exclusive_scan(exespace(), view_from, view_dest, init_value, bop);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, bop);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::exclusive_scan(exespace(), view2, view2, init_value,
|
||||
empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest,
|
||||
init_value, bop);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, bop);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::exclusive_scan("label", exespace(), view2, view2, init_value,
|
||||
empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
@ -303,34 +309,39 @@ void run_exclusive_scan_all_scenarios() {
|
||||
{"medium", 1103}, {"large", 10513}};
|
||||
|
||||
for (const auto& it : scenarios) {
|
||||
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{0});
|
||||
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{1});
|
||||
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{-2});
|
||||
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{3});
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{0});
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{1});
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{-2});
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{3});
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0});
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2});
|
||||
|
||||
#if !defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
// custom multiply op is only run for small views otherwise it overflows
|
||||
if (it.first == "small-a" || it.first == "small-b") {
|
||||
using custom_bop_t = MultiplyFunctor<ValueType>;
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{0},
|
||||
custom_bop_t());
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{1},
|
||||
custom_bop_t());
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{-2},
|
||||
custom_bop_t());
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{3},
|
||||
custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{0}, custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{1}, custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{-2}, custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{3}, custom_bop_t());
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0},
|
||||
custom_bop_t());
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2},
|
||||
custom_bop_t());
|
||||
}
|
||||
|
||||
using custom_bop_t = SumFunctor<ValueType>;
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{0},
|
||||
custom_bop_t());
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{1},
|
||||
custom_bop_t());
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{-2},
|
||||
custom_bop_t());
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{3},
|
||||
custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{0}, custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{1}, custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{-2}, custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{3}, custom_bop_t());
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0},
|
||||
custom_bop_t());
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2},
|
||||
custom_bop_t());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
#include <TestStdAlgorithmsCommon.hpp>
|
||||
#include <utility>
|
||||
#include <iomanip>
|
||||
|
||||
namespace Test {
|
||||
namespace stdalgos {
|
||||
@ -143,51 +144,6 @@ void my_host_inclusive_scan(it1 first, it1 last, it2 dest, BinOp bop,
|
||||
}
|
||||
}
|
||||
|
||||
template <class ViewType1, class ViewType2, class BinaryOp, class... Args>
|
||||
void verify_data(ViewType1 data_view, // contains data
|
||||
ViewType2 test_view, // the view to test
|
||||
BinaryOp bop, Args... args /* copy on purpose */) {
|
||||
//! always careful because views might not be deep copyable
|
||||
|
||||
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
|
||||
auto data_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
|
||||
|
||||
using gold_view_value_type = typename ViewType2::value_type;
|
||||
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
|
||||
"goldh", data_view.extent(0));
|
||||
my_host_inclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
|
||||
KE::begin(gold_h), bop, args...);
|
||||
|
||||
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
|
||||
auto test_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
||||
|
||||
const auto ext = test_view_h.extent(0);
|
||||
if (ext > 0) {
|
||||
for (std::size_t i = 0; i < ext; ++i) {
|
||||
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
|
||||
// << gold_h(i) << " " << test_view_h(i) << " "
|
||||
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
|
||||
|
||||
if (std::is_same<gold_view_value_type, int>::value) {
|
||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||
} else {
|
||||
const auto error =
|
||||
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
|
||||
if (error > 1e-10) {
|
||||
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
|
||||
<< " " << gold_h(i) << " " << test_view_h(i) << " "
|
||||
<< std::abs(static_cast<double>(gold_h(i) - test_view_h(i)))
|
||||
<< std::endl;
|
||||
}
|
||||
EXPECT_LT(error, 1e-10);
|
||||
}
|
||||
}
|
||||
// std::cout << " last el: " << test_view_h(ext-1) << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
template <class ValueType>
|
||||
struct MultiplyFunctor {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -204,107 +160,151 @@ struct SumFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
struct VerifyData {
|
||||
template <class ViewType1, class ViewType2, class BinaryOp, class... Args>
|
||||
void operator()(ViewType1 data_view, // contains data
|
||||
ViewType2 test_view, // the view to test
|
||||
BinaryOp bop, Args... args /* copy on purpose */) {
|
||||
//! always careful because views might not be deep copyable
|
||||
|
||||
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
|
||||
auto data_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
|
||||
|
||||
using gold_view_value_type = typename ViewType2::value_type;
|
||||
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
|
||||
"goldh", data_view.extent(0));
|
||||
my_host_inclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
|
||||
KE::begin(gold_h), bop, args...);
|
||||
|
||||
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
|
||||
auto test_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
||||
|
||||
const auto ext = test_view_h.extent(0);
|
||||
if (ext > 0) {
|
||||
for (std::size_t i = 0; i < ext; ++i) {
|
||||
if (std::is_same<gold_view_value_type, int>::value) {
|
||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||
} else {
|
||||
const auto error =
|
||||
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
|
||||
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
|
||||
<< static_cast<double>(test_view_h(i)) << " "
|
||||
<< static_cast<double>(gold_h(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class ViewType1, class ViewType2>
|
||||
void operator()(ViewType1 data_view, // contains data
|
||||
ViewType2 test_view) // the view to test
|
||||
{
|
||||
using value_type = typename ViewType1::non_const_value_type;
|
||||
(*this)(data_view, test_view, SumFunctor<value_type>());
|
||||
}
|
||||
};
|
||||
|
||||
std::string value_type_to_string(int) { return "int"; }
|
||||
std::string value_type_to_string(double) { return "double"; }
|
||||
|
||||
template <class Tag, class ValueType, class InfoType>
|
||||
void run_single_scenario_default_op(const InfoType& scenario_info) {
|
||||
using default_op = SumFunctor<ValueType>;
|
||||
template <class Tag, class ValueType, class InfoType, class... Args>
|
||||
void run_single_scenario(const InfoType& scenario_info,
|
||||
Args... args /* copy on purpose */) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
// std::cout << "inclusive_scan default op: " << name << ", "
|
||||
// << view_tag_to_string(Tag{}) << ", "
|
||||
// << value_type_to_string(ValueType()) << std::endl;
|
||||
|
||||
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
|
||||
auto view_from = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
|
||||
fill_view(view_from, name);
|
||||
// view_dest is filled with zeros before calling the algorithm everytime to
|
||||
// ensure the algorithm does something meaningful
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest));
|
||||
auto r =
|
||||
KE::inclusive_scan(exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest), args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, default_op());
|
||||
VerifyData()(view_from, view_dest, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest));
|
||||
auto r =
|
||||
KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest), args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, default_op());
|
||||
VerifyData()(view_from, view_dest, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan(exespace(), view_from, view_dest);
|
||||
auto r = KE::inclusive_scan(exespace(), view_from, view_dest, args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, default_op());
|
||||
VerifyData()(view_from, view_dest, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest);
|
||||
auto r =
|
||||
KE::inclusive_scan("label", exespace(), view_from, view_dest, args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, default_op());
|
||||
VerifyData()(view_from, view_dest, args...);
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType, class InfoType, class BinaryOp,
|
||||
class... Args>
|
||||
void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop,
|
||||
Args... args /* copy on purpose */) {
|
||||
template <class Tag, class ValueType, class InfoType, class... Args>
|
||||
void run_single_scenario_inplace(const InfoType& scenario_info,
|
||||
Args... args /* copy on purpose */) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
|
||||
// if (1 == sizeof...(Args)) {
|
||||
// std::cout << "inclusive_scan custom op and init value: " << name << ", "
|
||||
// << view_tag_to_string(Tag{}) << ", "
|
||||
// << value_type_to_string(ValueType()) << ", " << std::endl;
|
||||
// } else {
|
||||
// std::cout << "inclusive_scan custom op: " << name << ", "
|
||||
// << view_tag_to_string(Tag{}) << ", "
|
||||
// << value_type_to_string(ValueType()) << ", " << std::endl;
|
||||
// }
|
||||
// since here we call the in-place operation, we need to use two views:
|
||||
// view1: filled according to what the scenario asks for and is not modified
|
||||
// view2: filled according to what the scenario asks for and used for the
|
||||
// in-place op Therefore, after the op is done, view_2 should contain the
|
||||
// result of doing exclusive scan NOTE: view2 is filled below every time
|
||||
// because the algorithm acts in place
|
||||
|
||||
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
|
||||
auto view_from = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
|
||||
fill_view(view_from, name);
|
||||
auto view1 =
|
||||
create_view<ValueType>(Tag{}, view_ext, "inclusive_scan_inplace_view1");
|
||||
fill_view(view1, name);
|
||||
|
||||
auto view2 =
|
||||
create_view<ValueType>(Tag{}, view_ext, "inclusive_scan_inplace_view2");
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest), bop,
|
||||
args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, bop, args...);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::inclusive_scan(exespace(), KE::cbegin(view2), KE::cend(view2),
|
||||
KE::begin(view2), args...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest), bop,
|
||||
args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, bop, args...);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view2),
|
||||
KE::cend(view2), KE::begin(view2), args...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan(exespace(), view_from, view_dest, bop, args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, bop, args...);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::inclusive_scan(exespace(), view2, view2, args...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest, bop,
|
||||
args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, bop, args...);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::inclusive_scan("label", exespace(), view2, view2, args...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, args...);
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
@ -318,27 +318,35 @@ void run_inclusive_scan_all_scenarios() {
|
||||
{"medium-a", 313}, {"medium-b", 1103}, {"large", 10513}};
|
||||
|
||||
for (const auto& it : scenarios) {
|
||||
run_single_scenario_default_op<Tag, ValueType>(it);
|
||||
run_single_scenario<Tag, ValueType>(it);
|
||||
run_single_scenario_inplace<Tag, ValueType>(it);
|
||||
|
||||
#if !defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
// the sum custom op is always run
|
||||
using sum_binary_op = SumFunctor<ValueType>;
|
||||
sum_binary_op sbop;
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, sbop);
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{0});
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{1});
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{-2});
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{3});
|
||||
run_single_scenario<Tag, ValueType>(it, sbop);
|
||||
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{0});
|
||||
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{1});
|
||||
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{-2});
|
||||
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{3});
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, sbop, ValueType{0});
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, sbop, ValueType{-2});
|
||||
|
||||
// custom multiply only for small views to avoid overflows
|
||||
if (it.first == "small-a" || it.first == "small-b") {
|
||||
using mult_binary_op = MultiplyFunctor<ValueType>;
|
||||
mult_binary_op mbop;
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, mbop);
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{0});
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{1});
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{-2});
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{3});
|
||||
run_single_scenario<Tag, ValueType>(it, mbop);
|
||||
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{0});
|
||||
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{1});
|
||||
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{-2});
|
||||
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{3});
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, mbop);
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, mbop, ValueType{0});
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, mbop, ValueType{-2});
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -146,7 +146,7 @@ void run_single_scenario(const InfoType& scenario_info) {
|
||||
resultsA[3] = KE::is_sorted("label", exespace(), view);
|
||||
const auto allA = std::all_of(resultsA.cbegin(), resultsA.cend(),
|
||||
[=](bool v) { return v == gold; });
|
||||
EXPECT_TRUE(allA);
|
||||
EXPECT_TRUE(allA) << name << ", " << view_tag_to_string(Tag{});
|
||||
|
||||
#if !defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
CustomLessThanComparator<ValueType, ValueType> comp;
|
||||
@ -159,7 +159,7 @@ void run_single_scenario(const InfoType& scenario_info) {
|
||||
resultsB[3] = KE::is_sorted("label", exespace(), view, comp);
|
||||
const auto allB = std::all_of(resultsB.cbegin(), resultsB.cend(),
|
||||
[=](bool v) { return v == gold; });
|
||||
EXPECT_TRUE(allB);
|
||||
EXPECT_TRUE(allB) << name << ", " << view_tag_to_string(Tag{});
|
||||
#endif
|
||||
|
||||
Kokkos::fence();
|
||||
@ -173,9 +173,6 @@ void run_is_sorted_all_scenarios() {
|
||||
{"medium-a", 1003}, {"medium-b", 1003}, {"large-a", 101513},
|
||||
{"large-b", 101513}};
|
||||
|
||||
std::cout << "is_sorted: " << view_tag_to_string(Tag{})
|
||||
<< ", all overloads \n";
|
||||
|
||||
for (const auto& it : scenarios) {
|
||||
run_single_scenario<Tag, ValueType>(it);
|
||||
}
|
||||
|
||||
@ -145,10 +145,10 @@ void run_single_scenario(const InfoType& scenario_info) {
|
||||
KE::is_sorted_until("label", exespace(), KE::begin(view), KE::end(view));
|
||||
auto r3 = KE::is_sorted_until(exespace(), view);
|
||||
auto r4 = KE::is_sorted_until("label", exespace(), view);
|
||||
ASSERT_EQ(r1, gold);
|
||||
ASSERT_EQ(r2, gold);
|
||||
ASSERT_EQ(r3, gold);
|
||||
ASSERT_EQ(r4, gold);
|
||||
ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
ASSERT_EQ(r2, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
ASSERT_EQ(r3, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
ASSERT_EQ(r4, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
|
||||
#if !defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
CustomLessThanComparator<ValueType, ValueType> comp;
|
||||
@ -160,10 +160,10 @@ void run_single_scenario(const InfoType& scenario_info) {
|
||||
auto r8 = KE::is_sorted_until("label", exespace(), view, comp);
|
||||
#endif
|
||||
|
||||
ASSERT_EQ(r1, gold);
|
||||
ASSERT_EQ(r2, gold);
|
||||
ASSERT_EQ(r3, gold);
|
||||
ASSERT_EQ(r4, gold);
|
||||
ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
ASSERT_EQ(r2, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
ASSERT_EQ(r3, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
ASSERT_EQ(r4, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
|
||||
Kokkos::fence();
|
||||
}
|
||||
@ -176,9 +176,6 @@ void run_is_sorted_until_all_scenarios() {
|
||||
{"medium-a", 1003}, {"medium-b", 1003}, {"large-a", 101513},
|
||||
{"large-b", 101513}};
|
||||
|
||||
std::cout << "is_sorted_until: " << view_tag_to_string(Tag{})
|
||||
<< ", all overloads \n";
|
||||
|
||||
for (const auto& it : scenarios) {
|
||||
run_single_scenario<Tag, ValueType>(it);
|
||||
}
|
||||
|
||||
@ -48,7 +48,7 @@ struct MyMovableType {
|
||||
TEST(std_algorithms_mod_ops_test, move) {
|
||||
MyMovableType a;
|
||||
using move_t = decltype(std::move(a));
|
||||
static_assert(std::is_rvalue_reference<move_t>::value, "");
|
||||
static_assert(std::is_rvalue_reference<move_t>::value);
|
||||
|
||||
// move constr
|
||||
MyMovableType b(std::move(a));
|
||||
@ -70,7 +70,7 @@ struct StdAlgoModSeqOpsTestMove {
|
||||
void operator()(const int index) const {
|
||||
typename ViewType::value_type a{11};
|
||||
using move_t = decltype(std::move(a));
|
||||
static_assert(std::is_rvalue_reference<move_t>::value, "");
|
||||
static_assert(std::is_rvalue_reference<move_t>::value);
|
||||
m_view(index) = std::move(a);
|
||||
}
|
||||
|
||||
@ -89,50 +89,6 @@ TEST(std_algorithms_mod_ops_test, move_within_parfor) {
|
||||
}
|
||||
}
|
||||
|
||||
// ------------
|
||||
// swap
|
||||
// ------------
|
||||
TEST(std_algorithms_mod_ops_test, swap) {
|
||||
{
|
||||
int a = 1;
|
||||
int b = 2;
|
||||
KE::swap(a, b);
|
||||
ASSERT_EQ(a, 2);
|
||||
ASSERT_EQ(b, 1);
|
||||
}
|
||||
|
||||
{
|
||||
double a = 3.;
|
||||
double b = 1.;
|
||||
KE::swap(a, b);
|
||||
EXPECT_DOUBLE_EQ(a, 1.);
|
||||
EXPECT_DOUBLE_EQ(b, 3.);
|
||||
}
|
||||
}
|
||||
|
||||
template <class ViewType>
|
||||
struct StdAlgoModSeqOpsTestSwap {
|
||||
ViewType m_view;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int index) const {
|
||||
typename ViewType::value_type newval{11};
|
||||
KE::swap(m_view(index), newval);
|
||||
}
|
||||
|
||||
StdAlgoModSeqOpsTestSwap(ViewType aIn) : m_view(aIn) {}
|
||||
};
|
||||
|
||||
TEST(std_algorithms_mod_ops_test, swap_within_parfor) {
|
||||
auto a = create_view<double>(stdalgos::DynamicTag{}, 10, "a");
|
||||
StdAlgoModSeqOpsTestSwap<decltype(a)> fnc(a);
|
||||
Kokkos::parallel_for(a.extent(0), fnc);
|
||||
auto a_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a);
|
||||
for (std::size_t i = 0; i < a.extent(0); ++i) {
|
||||
EXPECT_DOUBLE_EQ(a_h(0), 11.);
|
||||
}
|
||||
}
|
||||
|
||||
// ------------
|
||||
// iter_swap
|
||||
// ------------
|
||||
|
||||
@ -110,11 +110,9 @@ void verify_data(const std::string& name, ResultType my_result,
|
||||
ViewTypeDestFalse view_dest_false, PredType pred) {
|
||||
using value_type = typename ViewTypeFrom::value_type;
|
||||
static_assert(
|
||||
std::is_same<value_type, typename ViewTypeDestTrue::value_type>::value,
|
||||
"");
|
||||
std::is_same<value_type, typename ViewTypeDestTrue::value_type>::value);
|
||||
static_assert(
|
||||
std::is_same<value_type, typename ViewTypeDestFalse::value_type>::value,
|
||||
"");
|
||||
std::is_same<value_type, typename ViewTypeDestFalse::value_type>::value);
|
||||
|
||||
const std::size_t ext = view_from.extent(0);
|
||||
|
||||
|
||||
@ -166,6 +166,10 @@ void run_all_scenarios() {
|
||||
}
|
||||
|
||||
TEST(std_algorithms_copy_if_team_test, test) {
|
||||
// FIXME_OPENMPTARGET
|
||||
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
|
||||
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
|
||||
#endif
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
@ -121,7 +121,9 @@ struct TestFunctorA {
|
||||
}
|
||||
};
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
struct InPlace {};
|
||||
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
/* description:
|
||||
use a rank-2 view randomly filled with values,
|
||||
@ -147,9 +149,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
using space_t = Kokkos::DefaultExecutionSpace;
|
||||
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
|
||||
// exclusive_scan returns an iterator so to verify that it is correct
|
||||
// each team stores the distance of the returned iterator from the beginning
|
||||
// of the interval that team operates on and then we check that these
|
||||
@ -168,12 +167,19 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
rand_pool pool(lowerBound * upperBound);
|
||||
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
|
||||
|
||||
// use CTAD for functor
|
||||
auto initValuesView =
|
||||
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
TestFunctorA fnc(sourceView, sourceView, distancesView,
|
||||
intraTeamSentinelView, initValuesView, binaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
} else {
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// run cpp-std kernel and check
|
||||
@ -223,11 +229,16 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
#undef exclusive_scan
|
||||
}
|
||||
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
} else {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
}
|
||||
}
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void run_all_scenarios() {
|
||||
for (int numTeams : teamSizesToTest) {
|
||||
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
|
||||
@ -236,16 +247,24 @@ void run_all_scenarios() {
|
||||
#else
|
||||
for (int apiId : {0, 1}) {
|
||||
#endif
|
||||
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
|
||||
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(std_algorithms_exclusive_scan_team_test, test) {
|
||||
// FIXME_OPENMPTARGET
|
||||
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
|
||||
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
|
||||
#endif
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
run_all_scenarios<DynamicTag, double, InPlace>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
|
||||
}
|
||||
|
||||
} // namespace TeamExclusiveScan
|
||||
|
||||
@ -139,7 +139,9 @@ struct TestFunctorA {
|
||||
}
|
||||
};
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
struct InPlace {};
|
||||
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
/* description:
|
||||
use a rank-2 view randomly filled with values,
|
||||
@ -165,9 +167,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
using space_t = Kokkos::DefaultExecutionSpace;
|
||||
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
|
||||
// inclusive_scan returns an iterator so to verify that it is correct
|
||||
// each team stores the distance of the returned iterator from the beginning
|
||||
// of the interval that team operates on and then we check that these
|
||||
@ -186,12 +185,20 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
rand_pool pool(lowerBound * upperBound);
|
||||
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
|
||||
|
||||
// use CTAD for functor
|
||||
auto initValuesView =
|
||||
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
TestFunctorA fnc(sourceView, sourceView, distancesView,
|
||||
intraTeamSentinelView, initValuesView, binaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
} else {
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// run cpp-std kernel and check
|
||||
@ -251,25 +258,38 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
#undef inclusive_scan
|
||||
}
|
||||
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
} else {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
}
|
||||
}
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void run_all_scenarios() {
|
||||
for (int numTeams : teamSizesToTest) {
|
||||
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
|
||||
for (int apiId : {0, 1, 2, 3, 4, 5}) {
|
||||
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
|
||||
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(std_algorithms_inclusive_scan_team_test, test) {
|
||||
// FIXME_OPENMPTARGET
|
||||
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
|
||||
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
|
||||
#endif
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
run_all_scenarios<DynamicTag, double, InPlace>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
|
||||
}
|
||||
|
||||
} // namespace TeamInclusiveScan
|
||||
|
||||
@ -212,6 +212,10 @@ void run_all_scenarios() {
|
||||
}
|
||||
|
||||
TEST(std_algorithms_remove_copy_team_test, test) {
|
||||
// FIXME_OPENMPTARGET
|
||||
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
|
||||
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
|
||||
#endif
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
@ -168,6 +168,10 @@ void run_all_scenarios() {
|
||||
}
|
||||
|
||||
TEST(std_algorithms_remove_copy_if_team_test, test) {
|
||||
// FIXME_OPENMPTARGET
|
||||
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
|
||||
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
|
||||
#endif
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
@ -108,7 +108,9 @@ struct TestFunctorA {
|
||||
}
|
||||
};
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
struct InPlace {};
|
||||
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
/* description:
|
||||
use a rank-2 view randomly filled with values,
|
||||
@ -134,9 +136,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
using space_t = Kokkos::DefaultExecutionSpace;
|
||||
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
|
||||
// tranform_exclusive_scan returns an iterator so to verify that it is correct
|
||||
// each team stores the distance of the returned iterator from the beginning
|
||||
// of the interval that team operates on and then we check that these
|
||||
@ -156,12 +155,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
rand_pool pool(lowerBound * upperBound);
|
||||
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
|
||||
|
||||
// use CTAD for functor
|
||||
auto initValuesView =
|
||||
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, unaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
TestFunctorA fnc(sourceView, sourceView, distancesView,
|
||||
intraTeamSentinelView, initValuesView, binaryOp, unaryOp,
|
||||
apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
} else {
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, unaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// run cpp-std kernel and check
|
||||
@ -200,16 +208,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
#undef transform_exclusive_scan
|
||||
}
|
||||
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
} else {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
}
|
||||
}
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void run_all_scenarios() {
|
||||
for (int numTeams : teamSizesToTest) {
|
||||
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
|
||||
for (int apiId : {0, 1}) {
|
||||
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
|
||||
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -219,6 +232,10 @@ TEST(std_algorithms_transform_exclusive_scan_team_test, test) {
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
run_all_scenarios<DynamicTag, double, InPlace>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
|
||||
}
|
||||
|
||||
} // namespace TeamTransformExclusiveScan
|
||||
|
||||
@ -131,7 +131,9 @@ struct TestFunctorA {
|
||||
}
|
||||
};
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
struct InPlace {};
|
||||
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
/* description:
|
||||
use a rank-2 view randomly filled with values,
|
||||
@ -157,9 +159,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
using space_t = Kokkos::DefaultExecutionSpace;
|
||||
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
|
||||
// tranform_inclusive_scan returns an iterator so to verify that it is correct
|
||||
// each team stores the distance of the returned iterator from the beginning
|
||||
// of the interval that team operates on and then we check that these
|
||||
@ -179,12 +178,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
rand_pool pool(lowerBound * upperBound);
|
||||
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
|
||||
|
||||
// use CTAD for functor
|
||||
auto initValuesView =
|
||||
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, unaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
TestFunctorA fnc(sourceView, sourceView, distancesView,
|
||||
intraTeamSentinelView, initValuesView, binaryOp, unaryOp,
|
||||
apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
} else {
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, unaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// run cpp-std kernel and check
|
||||
@ -236,16 +244,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
}
|
||||
#undef transform_inclusive_scan
|
||||
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
} else {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
}
|
||||
}
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void run_all_scenarios() {
|
||||
for (int numTeams : teamSizesToTest) {
|
||||
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
|
||||
for (int apiId : {0, 1, 2, 3}) {
|
||||
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
|
||||
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -255,6 +268,10 @@ TEST(std_algorithms_transform_inclusive_scan_team_test, test) {
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
run_all_scenarios<DynamicTag, double, InPlace>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
|
||||
}
|
||||
|
||||
} // namespace TeamTransformInclusiveScan
|
||||
|
||||
@ -186,6 +186,10 @@ void run_all_scenarios() {
|
||||
}
|
||||
|
||||
TEST(std_algorithms_unique_copy_team_test, test) {
|
||||
// FIXME_OPENMPTARGET
|
||||
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
|
||||
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
|
||||
#endif
|
||||
run_all_scenarios<DynamicTag, int>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, int>();
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
#include <TestStdAlgorithmsCommon.hpp>
|
||||
#include <utility>
|
||||
#include <iomanip>
|
||||
|
||||
namespace Test {
|
||||
namespace stdalgos {
|
||||
@ -160,24 +161,15 @@ void verify_data(ViewType1 data_view, // contains data
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
||||
if (test_view_h.extent(0) > 0) {
|
||||
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
|
||||
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
|
||||
// << gold_h(i) << " " << test_view_h(i) << " "
|
||||
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
|
||||
|
||||
if (std::is_same<gold_view_value_type, int>::value) {
|
||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||
} else {
|
||||
const auto error = std::abs(gold_h(i) - test_view_h(i));
|
||||
if (error > 1e-10) {
|
||||
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
|
||||
<< " " << gold_h(i) << " " << test_view_h(i) << " "
|
||||
<< std::abs(gold_h(i) - test_view_h(i)) << std::endl;
|
||||
}
|
||||
EXPECT_LT(error, 1e-10);
|
||||
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
|
||||
<< static_cast<double>(test_view_h(i)) << " "
|
||||
<< static_cast<double>(gold_h(i));
|
||||
}
|
||||
}
|
||||
// std::cout << " last el: " << test_view_h(test_view_h.extent(0)-1) <<
|
||||
// std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -205,17 +197,13 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value,
|
||||
BinaryOp bop, UnaryOp uop) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
// std::cout << "transform_exclusive_scan custom op: " << name << ", "
|
||||
// << view_tag_to_string(Tag{}) << ", "
|
||||
// << value_type_to_string(ValueType()) << ", "
|
||||
// << "init = " << init_value << std::endl;
|
||||
|
||||
auto view_dest =
|
||||
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan");
|
||||
auto view_from =
|
||||
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan");
|
||||
auto view_from = create_view<ValueType>(Tag{}, view_ext,
|
||||
"transform_exclusive_scan_view_from");
|
||||
fill_view(view_from, name);
|
||||
|
||||
auto view_dest = create_view<ValueType>(Tag{}, view_ext,
|
||||
"transform_exclusive_scan_view_dest");
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::transform_exclusive_scan(
|
||||
@ -253,6 +241,65 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value,
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType, class InfoType, class BinaryOp,
|
||||
class UnaryOp>
|
||||
void run_single_scenario_inplace(const InfoType& scenario_info,
|
||||
ValueType init_value, BinaryOp bop,
|
||||
UnaryOp uop) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
|
||||
// since here we call the in-place operation, we need to use two views:
|
||||
// view1: filled according to what the scenario asks for and is not modified
|
||||
// view2: filled according to what the scenario asks for and used for the
|
||||
// in-place op Therefore, after the op is done, view2 should contain the
|
||||
// result of doing exclusive scan NOTE: view2 is filled below every time
|
||||
// because the algorithm acts in place
|
||||
|
||||
auto view1 =
|
||||
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan_view1");
|
||||
fill_view(view1, name);
|
||||
|
||||
auto view2 =
|
||||
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan_view2");
|
||||
|
||||
{
|
||||
fill_view(view2, name);
|
||||
auto r = KE::transform_exclusive_scan(exespace(), KE::cbegin(view2),
|
||||
KE::cend(view2), KE::begin(view2),
|
||||
init_value, bop, uop);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
verify_data(view1, view2, init_value, bop, uop);
|
||||
}
|
||||
|
||||
{
|
||||
fill_view(view2, name);
|
||||
auto r = KE::transform_exclusive_scan(
|
||||
"label", exespace(), KE::cbegin(view2), KE::cend(view2),
|
||||
KE::begin(view2), init_value, bop, uop);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
verify_data(view1, view2, init_value, bop, uop);
|
||||
}
|
||||
|
||||
{
|
||||
fill_view(view2, name);
|
||||
auto r = KE::transform_exclusive_scan(exespace(), view2, view2, init_value,
|
||||
bop, uop);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
verify_data(view1, view2, init_value, bop, uop);
|
||||
}
|
||||
|
||||
{
|
||||
fill_view(view2, name);
|
||||
auto r = KE::transform_exclusive_scan("label", exespace(), view2, view2,
|
||||
init_value, bop, uop);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
verify_data(view1, view2, init_value, bop, uop);
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType>
|
||||
void run_all_scenarios() {
|
||||
const std::map<std::string, std::size_t> scenarios = {
|
||||
@ -267,6 +314,11 @@ void run_all_scenarios() {
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{1}, bop_t(), uop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{-2}, bop_t(), uop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{3}, bop_t(), uop_t());
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0}, bop_t(),
|
||||
uop_t());
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2}, bop_t(),
|
||||
uop_t());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
#include <TestStdAlgorithmsCommon.hpp>
|
||||
#include <utility>
|
||||
#include <iomanip>
|
||||
|
||||
namespace Test {
|
||||
namespace stdalgos {
|
||||
@ -172,24 +173,15 @@ void verify_data(ViewType1 data_view, // contains data
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
||||
if (test_view_h.extent(0) > 0) {
|
||||
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
|
||||
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
|
||||
// << gold_h(i) << " " << test_view_h(i) << " "
|
||||
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
|
||||
|
||||
if (std::is_same<gold_view_value_type, int>::value) {
|
||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||
} else {
|
||||
const auto error = std::abs(gold_h(i) - test_view_h(i));
|
||||
if (error > 1e-10) {
|
||||
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
|
||||
<< " " << gold_h(i) << " " << test_view_h(i) << " "
|
||||
<< std::abs(gold_h(i) - test_view_h(i)) << std::endl;
|
||||
}
|
||||
EXPECT_LT(error, 1e-10);
|
||||
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
|
||||
<< static_cast<double>(test_view_h(i)) << " "
|
||||
<< static_cast<double>(gold_h(i));
|
||||
}
|
||||
}
|
||||
// std::cout << " last el: " << test_view_h(test_view_h.extent(0)-1) <<
|
||||
// std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -210,30 +202,11 @@ struct SumBinaryFunctor {
|
||||
std::string value_type_to_string(int) { return "int"; }
|
||||
std::string value_type_to_string(double) { return "double"; }
|
||||
|
||||
template <class Tag, class BopT, class UopT>
|
||||
void print_scenario_details(const std::string& name, BopT bop, UopT uop) {
|
||||
(void)bop;
|
||||
(void)uop;
|
||||
std::cout << "transform_inclusive_scan: " << name << ", "
|
||||
<< view_tag_to_string(Tag{}) << std::endl;
|
||||
}
|
||||
|
||||
template <class Tag, class BopT, class UopT, class ValueType>
|
||||
void print_scenario_details(const std::string& name, BopT bop, UopT uop,
|
||||
ValueType init_value) {
|
||||
(void)bop;
|
||||
(void)uop;
|
||||
std::cout << "transform_inclusive_scan: " << name << ", "
|
||||
<< view_tag_to_string(Tag{}) << ", "
|
||||
<< "init = " << init_value << std::endl;
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType, class InfoType, class... Args>
|
||||
void run_single_scenario(const InfoType& scenario_info,
|
||||
Args... args /* by value on purpose*/) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
// print_scenario_details<Tag>(name, args...);
|
||||
|
||||
auto view_dest =
|
||||
create_view<ValueType>(Tag{}, view_ext, "transform_inclusive_scan");
|
||||
@ -278,6 +251,63 @@ void run_single_scenario(const InfoType& scenario_info,
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType, class InfoType, class... Args>
|
||||
void run_single_scenario_inplace(const InfoType& scenario_info,
|
||||
Args... args /* by value on purpose*/) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
|
||||
// since here we call the in-place operation, we need to use two views:
|
||||
// view1: filled according to scenario and is not modified
|
||||
// view2: filled according scenario and used for the in-place op
|
||||
// Therefore, after the op is done, view_2 should contain the
|
||||
// result of doing exclusive scan.
|
||||
// NOTE: view2 must be filled before every call to the algorithm
|
||||
// because the algorithm acts in place
|
||||
|
||||
auto view_1 = create_view<ValueType>(Tag{}, view_ext,
|
||||
"transform_inclusive_scan_view_1");
|
||||
fill_view(view_1, name);
|
||||
|
||||
auto view_2 = create_view<ValueType>(Tag{}, view_ext,
|
||||
"transform_inclusive_scan_view_2");
|
||||
|
||||
{
|
||||
fill_view(view_2, name);
|
||||
auto r = KE::transform_inclusive_scan(exespace(), KE::cbegin(view_2),
|
||||
KE::cend(view_2), KE::begin(view_2),
|
||||
args...);
|
||||
ASSERT_EQ(r, KE::end(view_2));
|
||||
verify_data(view_1, view_2, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_view(view_2, name);
|
||||
auto r = KE::transform_inclusive_scan("label", exespace(),
|
||||
KE::cbegin(view_2), KE::cend(view_2),
|
||||
KE::begin(view_2), args...);
|
||||
ASSERT_EQ(r, KE::end(view_2));
|
||||
verify_data(view_1, view_2, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_view(view_2, name);
|
||||
auto r = KE::transform_inclusive_scan(exespace(), view_2, view_2, args...);
|
||||
ASSERT_EQ(r, KE::end(view_2));
|
||||
verify_data(view_1, view_2, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_view(view_2, name);
|
||||
auto r = KE::transform_inclusive_scan("label", exespace(), view_2, view_2,
|
||||
args...);
|
||||
ASSERT_EQ(r, KE::end(view_2));
|
||||
verify_data(view_1, view_2, args...);
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType>
|
||||
void run_all_scenarios() {
|
||||
const std::map<std::string, std::size_t> scenarios = {
|
||||
@ -294,15 +324,23 @@ void run_all_scenarios() {
|
||||
run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{2});
|
||||
run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{-1});
|
||||
run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{-2});
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t());
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t(),
|
||||
ValueType{0});
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t(),
|
||||
ValueType{2});
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t(),
|
||||
ValueType{-2});
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan) {
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
// run_all_scenarios<StridedThreeTag, double>();
|
||||
// run_all_scenarios<DynamicTag, int>();
|
||||
// run_all_scenarios<StridedThreeTag, int>();
|
||||
run_all_scenarios<StridedThreeTag, double>();
|
||||
run_all_scenarios<DynamicTag, int>();
|
||||
run_all_scenarios<StridedThreeTag, int>();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -83,9 +83,6 @@ auto run_min_or_max_test(ViewType view, StdReducersTestEnumOrder enValue) {
|
||||
static_assert(std::is_same<ExeSpace, Kokkos::HostSpace>::value,
|
||||
"test is only enabled for HostSpace");
|
||||
|
||||
std::cout << "checking reduction with order: " << order_to_string(enValue)
|
||||
<< "\n";
|
||||
|
||||
using view_value_type = typename ViewType::value_type;
|
||||
using reducer_type = std::conditional_t<
|
||||
(flag == 0), Kokkos::MaxFirstLoc<view_value_type, IndexType, ExeSpace>,
|
||||
@ -132,18 +129,24 @@ TEST(std_algorithms_reducers, max_first_loc) {
|
||||
|
||||
const auto pair1 = run_min_or_max_test<0, hostspace, index_type>(
|
||||
view_h, StdReducersTestEnumOrder::LeftToRight);
|
||||
ASSERT_EQ(pair1.first, gold_value);
|
||||
ASSERT_EQ(pair1.second, gold_location);
|
||||
ASSERT_EQ(pair1.first, gold_value)
|
||||
<< order_to_string(StdReducersTestEnumOrder::LeftToRight);
|
||||
ASSERT_EQ(pair1.second, gold_location)
|
||||
<< order_to_string(StdReducersTestEnumOrder::LeftToRight);
|
||||
|
||||
const auto pair2 = run_min_or_max_test<0, hostspace, index_type>(
|
||||
view_h, StdReducersTestEnumOrder::RightToLeft);
|
||||
ASSERT_EQ(pair2.first, gold_value);
|
||||
ASSERT_EQ(pair2.second, gold_location);
|
||||
ASSERT_EQ(pair2.first, gold_value)
|
||||
<< order_to_string(StdReducersTestEnumOrder::RightToLeft);
|
||||
ASSERT_EQ(pair2.second, gold_location)
|
||||
<< order_to_string(StdReducersTestEnumOrder::RightToLeft);
|
||||
|
||||
const auto pair3 = run_min_or_max_test<0, hostspace, index_type>(
|
||||
view_h, StdReducersTestEnumOrder::Random);
|
||||
ASSERT_EQ(pair3.first, gold_value);
|
||||
ASSERT_EQ(pair3.second, gold_location);
|
||||
ASSERT_EQ(pair3.first, gold_value)
|
||||
<< order_to_string(StdReducersTestEnumOrder::Random);
|
||||
ASSERT_EQ(pair3.second, gold_location)
|
||||
<< order_to_string(StdReducersTestEnumOrder::Random);
|
||||
}
|
||||
|
||||
TEST(std_algorithms_reducers, min_first_loc) {
|
||||
@ -191,9 +194,6 @@ void run_min_max_test(ViewType view, StdReducersTestEnumOrder enValue,
|
||||
static_assert(std::is_same<ExeSpace, Kokkos::HostSpace>::value,
|
||||
"test is only enabled for HostSpace");
|
||||
|
||||
std::cout << "checking reduction with order: " << order_to_string(enValue)
|
||||
<< "\n";
|
||||
|
||||
using view_value_type = typename ViewType::value_type;
|
||||
using reducer_type =
|
||||
Kokkos::MinMaxFirstLastLoc<view_value_type, IndexType, ExeSpace>;
|
||||
@ -212,10 +212,10 @@ void run_min_max_test(ViewType view, StdReducersTestEnumOrder enValue,
|
||||
reduction_value_type{view(index), view(index), index, index});
|
||||
}
|
||||
|
||||
ASSERT_EQ(red_result.min_val, gold_values.first);
|
||||
ASSERT_EQ(red_result.max_val, gold_values.second);
|
||||
ASSERT_EQ(red_result.min_loc, gold_locs.first);
|
||||
ASSERT_EQ(red_result.max_loc, gold_locs.second);
|
||||
ASSERT_EQ(red_result.min_val, gold_values.first) << order_to_string(enValue);
|
||||
ASSERT_EQ(red_result.max_val, gold_values.second) << order_to_string(enValue);
|
||||
ASSERT_EQ(red_result.min_loc, gold_locs.first) << order_to_string(enValue);
|
||||
ASSERT_EQ(red_result.max_loc, gold_locs.second) << order_to_string(enValue);
|
||||
}
|
||||
|
||||
TEST(std_algorithms_reducers, min_max_first_last_loc) {
|
||||
|
||||
@ -1 +1,12 @@
|
||||
#FIXME_OPENMPTARGET - compiling in debug mode causes ICE.
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(atomic)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(gather)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(gups)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(launch_latency)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(stream)
|
||||
|
||||
#FIXME_OPENMPTARGET - These two benchmarks cause ICE. Commenting them for now but a deeper analysis on the cause and a possible fix will follow.
|
||||
IF(NOT Kokkos_ENABLE_OPENMPTARGET)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(policy_performance)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(bytes_and_flops)
|
||||
ENDIF()
|
||||
|
||||
4
lib/kokkos/benchmarks/atomic/CMakeLists.txt
Normal file
4
lib/kokkos/benchmarks/atomic/CMakeLists.txt
Normal file
@ -0,0 +1,4 @@
|
||||
KOKKOS_ADD_EXECUTABLE(
|
||||
atomic
|
||||
SOURCES main.cpp
|
||||
)
|
||||
4
lib/kokkos/benchmarks/bytes_and_flops/CMakeLists.txt
Normal file
4
lib/kokkos/benchmarks/bytes_and_flops/CMakeLists.txt
Normal file
@ -0,0 +1,4 @@
|
||||
KOKKOS_ADD_EXECUTABLE(
|
||||
bytes_and_flops
|
||||
SOURCES bench_double.cpp bench_float.cpp bench_int32_t.cpp bench_int64_t.cpp main.cpp
|
||||
)
|
||||
@ -37,22 +37,22 @@ struct RunStride {
|
||||
};
|
||||
|
||||
#define STRIDE 1
|
||||
#include <bench_stride.hpp>
|
||||
#include "bench_stride.hpp"
|
||||
#undef STRIDE
|
||||
#define STRIDE 2
|
||||
#include <bench_stride.hpp>
|
||||
#include "bench_stride.hpp"
|
||||
#undef STRIDE
|
||||
#define STRIDE 4
|
||||
#include <bench_stride.hpp>
|
||||
#include "bench_stride.hpp"
|
||||
#undef STRIDE
|
||||
#define STRIDE 8
|
||||
#include <bench_stride.hpp>
|
||||
#include "bench_stride.hpp"
|
||||
#undef STRIDE
|
||||
#define STRIDE 16
|
||||
#include <bench_stride.hpp>
|
||||
#include "bench_stride.hpp"
|
||||
#undef STRIDE
|
||||
#define STRIDE 32
|
||||
#include <bench_stride.hpp>
|
||||
#include "bench_stride.hpp"
|
||||
#undef STRIDE
|
||||
|
||||
template <class Scalar>
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#include <bench.hpp>
|
||||
#include "bench.hpp"
|
||||
|
||||
template void run_stride_unroll<double>(int N, int K, int R, int D, int U,
|
||||
int F, int T, int S, int B, int I);
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#include <bench.hpp>
|
||||
#include "bench.hpp"
|
||||
|
||||
template void run_stride_unroll<float>(int N, int K, int R, int D, int U, int F,
|
||||
int T, int S, int B, int I);
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#include <bench.hpp>
|
||||
#include "bench.hpp"
|
||||
|
||||
template void run_stride_unroll<int32_t>(int N, int K, int R, int D, int U,
|
||||
int F, int T, int S, int B, int I);
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#include <bench.hpp>
|
||||
#include "bench.hpp"
|
||||
|
||||
template void run_stride_unroll<int64_t>(int N, int K, int R, int D, int U,
|
||||
int F, int T, int S, int B, int I);
|
||||
|
||||
@ -15,28 +15,28 @@
|
||||
//@HEADER
|
||||
|
||||
#define UNROLL 1
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 2
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 3
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 4
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 5
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 6
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 7
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 8
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
|
||||
template <class Scalar>
|
||||
|
||||
@ -26,7 +26,7 @@ struct Run<Scalar, UNROLL, STRIDE> {
|
||||
Kokkos::deep_copy(C, Scalar(3.5));
|
||||
|
||||
Kokkos::Timer timer;
|
||||
for (int i = 0; i < I; ++i) {
|
||||
for (int iter = 0; iter < I; ++iter) {
|
||||
Kokkos::parallel_for(
|
||||
"BenchmarkKernel",
|
||||
Kokkos::TeamPolicy<>(N, T).set_scratch_size(0, Kokkos::PerTeam(S)),
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Timer.hpp>
|
||||
#include <bench.hpp>
|
||||
#include "bench.hpp"
|
||||
#include <cstdlib>
|
||||
|
||||
extern template void run_stride_unroll<float>(int, int, int, int, int, int, int,
|
||||
@ -86,7 +86,7 @@ int main(int argc, char* argv[]) {
|
||||
printf("D must be one of 1,2,4,8,16,32\n");
|
||||
return 0;
|
||||
}
|
||||
if ((P < 1) && (P > 2)) {
|
||||
if ((P < 1) || (P > 4)) {
|
||||
printf("P must be one of 1,2,3,4\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
4
lib/kokkos/benchmarks/gather/CMakeLists.txt
Normal file
4
lib/kokkos/benchmarks/gather/CMakeLists.txt
Normal file
@ -0,0 +1,4 @@
|
||||
KOKKOS_ADD_EXECUTABLE(
|
||||
gather
|
||||
SOURCES main.cpp
|
||||
)
|
||||
@ -20,28 +20,28 @@ struct RunGather {
|
||||
};
|
||||
|
||||
#define UNROLL 1
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 2
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 3
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 4
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 5
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 6
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 7
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 8
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
|
||||
template <class Scalar>
|
||||
|
||||
@ -138,7 +138,7 @@ struct RunGather<Scalar, UNROLL> {
|
||||
printf(
|
||||
"SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: "
|
||||
"%lf GGather/s: %lf\n",
|
||||
sizeof(Scalar) / 4, N, K, D, R, UNROLL, F, seconds,
|
||||
static_cast<int>(sizeof(Scalar) / 4), N, K, D, R, UNROLL, F, seconds,
|
||||
1.0 * bytes / seconds / 1024 / 1024 / 1024, 1.e-9 * flops / seconds,
|
||||
1.e-9 * gather_ops / seconds);
|
||||
}
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Timer.hpp>
|
||||
#include <gather.hpp>
|
||||
#include "gather.hpp"
|
||||
#include <cstdlib>
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
|
||||
4
lib/kokkos/benchmarks/launch_latency/CMakeLists.txt
Normal file
4
lib/kokkos/benchmarks/launch_latency/CMakeLists.txt
Normal file
@ -0,0 +1,4 @@
|
||||
KOKKOS_ADD_EXECUTABLE(
|
||||
launch_latency
|
||||
SOURCES launch_latency.cpp
|
||||
)
|
||||
283
lib/kokkos/benchmarks/launch_latency/launch_latency.cpp
Normal file
283
lib/kokkos/benchmarks/launch_latency/launch_latency.cpp
Normal file
@ -0,0 +1,283 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 4.0
|
||||
// Copyright (2022) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://kokkos.org/LICENSE for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
/*! \file launch_latency.cpp
|
||||
|
||||
Tests of parallel_for and parallel_reduce latency for different
|
||||
circumstances.
|
||||
|
||||
Three launch kinds are tested: parallel_for, parallel_reduce into scalar,
|
||||
and parallel_reduce into view
|
||||
|
||||
N controls how large the parallel loops is
|
||||
V controls how large the functor is
|
||||
M controls across how many launches the latency is averaged
|
||||
K controls how larege the nested loop is (no larger than V)
|
||||
|
||||
For each launch kind,
|
||||
1. Avg functor dispatch latency: (time to do M launches) / M
|
||||
2. Avg functor completion throughput: (M launches + sync) / M
|
||||
3. Avg functor completion latency: (M (launch + sync)) / M
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
template <int V>
|
||||
struct TestFunctor {
|
||||
double values[V];
|
||||
Kokkos::View<double*> a;
|
||||
int K;
|
||||
TestFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
for (int j = 0; j < K; j++) a(i) += 1.0 * i * values[j];
|
||||
}
|
||||
};
|
||||
|
||||
template <int V>
|
||||
struct TestRFunctor {
|
||||
double values[V];
|
||||
Kokkos::View<double*> a;
|
||||
int K;
|
||||
TestRFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, double& lsum) const {
|
||||
for (int j = 0; j < K; j++) a(i) += 1.0 * i * values[j];
|
||||
lsum += a(i);
|
||||
}
|
||||
};
|
||||
|
||||
struct Opts {
|
||||
bool par_for = true;
|
||||
bool par_reduce = true;
|
||||
bool par_reduce_view = true;
|
||||
};
|
||||
|
||||
template <int V>
|
||||
void run(int N, int M, int K, const Opts& opts) {
|
||||
std::string l_no_fence, l_fence, l_red_no_fence, l_red_fence,
|
||||
l_red_view_no_fence, l_red_view_fence;
|
||||
{
|
||||
std::ostringstream ostream;
|
||||
ostream << "RunNoFence_" << N << "_" << K << std::endl;
|
||||
l_no_fence = ostream.str();
|
||||
}
|
||||
{
|
||||
std::ostringstream ostream;
|
||||
ostream << "RunFence_" << N << "_" << K << std::endl;
|
||||
l_fence = ostream.str();
|
||||
}
|
||||
{
|
||||
std::ostringstream ostream;
|
||||
ostream << "RunReduceNoFence_" << N << "_" << K << std::endl;
|
||||
l_red_no_fence = ostream.str();
|
||||
}
|
||||
{
|
||||
std::ostringstream ostream;
|
||||
ostream << "RunReduceFence_" << N << "_" << K << std::endl;
|
||||
l_red_fence = ostream.str();
|
||||
}
|
||||
{
|
||||
std::ostringstream ostream;
|
||||
ostream << "RunReduceViewNoFence_" << N << "_" << K << std::endl;
|
||||
l_red_view_no_fence = ostream.str();
|
||||
}
|
||||
{
|
||||
std::ostringstream ostream;
|
||||
ostream << "RunReduceViewFence_" << N << "_" << K << std::endl;
|
||||
l_red_view_fence = ostream.str();
|
||||
}
|
||||
|
||||
double result;
|
||||
Kokkos::View<double*> a("A", N);
|
||||
Kokkos::View<double> v_result("result");
|
||||
TestFunctor<V> f(a, K);
|
||||
TestRFunctor<V> rf(a, K);
|
||||
Kokkos::Timer timer;
|
||||
|
||||
// initialize to an obviously wrong value
|
||||
double time_no_fence = -1; // launch loop
|
||||
double time_no_fence_fenced = -1; // launch loop then fence
|
||||
double time_fence = -1; // launch&fence loop
|
||||
|
||||
double time_red_no_fence = -1;
|
||||
double time_red_no_fence_fenced = -1;
|
||||
double time_red_fence = -1;
|
||||
|
||||
double time_red_view_no_fence = -1;
|
||||
double time_red_view_no_fence_fenced = -1;
|
||||
double time_red_view_fence = -1;
|
||||
|
||||
if (opts.par_for) {
|
||||
// warmup
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
Kokkos::parallel_for(l_no_fence, N, f);
|
||||
}
|
||||
Kokkos::fence();
|
||||
|
||||
timer.reset();
|
||||
for (int i = 0; i < M; i++) {
|
||||
Kokkos::parallel_for(l_no_fence, N, f);
|
||||
}
|
||||
time_no_fence = timer.seconds();
|
||||
Kokkos::fence();
|
||||
time_no_fence_fenced = timer.seconds();
|
||||
|
||||
timer.reset();
|
||||
for (int i = 0; i < M; i++) {
|
||||
Kokkos::parallel_for(l_fence, N, f);
|
||||
Kokkos::fence();
|
||||
}
|
||||
time_fence = timer.seconds();
|
||||
}
|
||||
|
||||
if (opts.par_reduce) {
|
||||
// warmup
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
Kokkos::parallel_reduce(l_red_no_fence, N, rf, result);
|
||||
}
|
||||
Kokkos::fence();
|
||||
|
||||
timer.reset();
|
||||
for (int i = 0; i < M; i++) {
|
||||
Kokkos::parallel_reduce(l_red_no_fence, N, rf, result);
|
||||
}
|
||||
time_red_no_fence = timer.seconds();
|
||||
Kokkos::fence();
|
||||
time_red_no_fence_fenced = timer.seconds();
|
||||
|
||||
timer.reset();
|
||||
for (int i = 0; i < M; i++) {
|
||||
Kokkos::parallel_reduce(l_red_fence, N, rf, result);
|
||||
Kokkos::fence();
|
||||
}
|
||||
time_red_fence = timer.seconds();
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
if (opts.par_reduce_view) {
|
||||
// warmup
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
Kokkos::parallel_reduce(l_red_view_no_fence, N, rf, v_result);
|
||||
}
|
||||
Kokkos::fence();
|
||||
|
||||
timer.reset();
|
||||
for (int i = 0; i < M; i++) {
|
||||
Kokkos::parallel_reduce(l_red_view_no_fence, N, rf, v_result);
|
||||
}
|
||||
time_red_view_no_fence = timer.seconds();
|
||||
Kokkos::fence();
|
||||
time_red_view_no_fence_fenced = timer.seconds();
|
||||
|
||||
timer.reset();
|
||||
for (int i = 0; i < M; i++) {
|
||||
Kokkos::parallel_reduce(l_red_view_fence, N, rf, v_result);
|
||||
Kokkos::fence();
|
||||
}
|
||||
time_red_view_fence = timer.seconds();
|
||||
Kokkos::fence();
|
||||
timer.reset();
|
||||
}
|
||||
|
||||
const double x = 1.e6 / M;
|
||||
printf("%i %i %i %i", N, V, K, M);
|
||||
if (opts.par_for) {
|
||||
printf(" parallel_for: %lf %lf ( %lf )", x * time_no_fence, x * time_fence,
|
||||
x * time_no_fence_fenced);
|
||||
}
|
||||
if (opts.par_reduce) {
|
||||
printf(" parallel_reduce: %lf %lf ( %lf )", x * time_red_no_fence,
|
||||
x * time_red_fence, x * time_red_no_fence_fenced);
|
||||
}
|
||||
if (opts.par_reduce_view) {
|
||||
printf(" parallel_reduce(view): %lf %lf ( %lf )",
|
||||
x * time_red_view_no_fence, x * time_red_view_fence,
|
||||
x * time_red_view_no_fence_fenced);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
int main(int argc, char* argv[]) {
|
||||
Kokkos::initialize(argc, argv);
|
||||
{
|
||||
int N = 10000;
|
||||
int M = 20;
|
||||
int K = 1;
|
||||
|
||||
Opts opts;
|
||||
|
||||
printf("==========================\n");
|
||||
printf("Kokkos Launch Latency Test\n");
|
||||
printf("==========================\n");
|
||||
printf("\n");
|
||||
printf("Usage: %s ARGUMENTS [OPTIONS...]\n\n", argv[0]);
|
||||
printf("Arguments: N M K\n");
|
||||
printf(" N: loop length\n");
|
||||
printf(" M: how many kernels to dispatch\n");
|
||||
printf(
|
||||
" K: nested loop length (capped by size of functor member array\n\n");
|
||||
printf("Options:\n");
|
||||
printf(" --no-parallel-for: skip parallel_for benchmark\n");
|
||||
printf(" --no-parallel-reduce: skip parallel_reduce benchmark\n");
|
||||
printf(
|
||||
" --no-parallel-reduce-view: skip parallel_reduce into view "
|
||||
"benchmark\n");
|
||||
printf("\n\n");
|
||||
printf(" Output V is the size of the functor member array\n");
|
||||
printf("\n\n");
|
||||
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
const std::string_view arg(argv[i]);
|
||||
|
||||
// anything that doesn't start with --
|
||||
if (arg.size() < 2 ||
|
||||
(arg.size() >= 2 && arg[0] != '-' && arg[1] != '-')) {
|
||||
if (i == 1)
|
||||
N = atoi(arg.data());
|
||||
else if (i == 2)
|
||||
M = atoi(arg.data());
|
||||
else if (i == 3)
|
||||
K = atoi(arg.data());
|
||||
else {
|
||||
throw std::runtime_error("unexpected argument!");
|
||||
}
|
||||
} else if (arg == "--no-parallel-for") {
|
||||
opts.par_for = false;
|
||||
} else if (arg == "--no-parallel-reduce") {
|
||||
opts.par_reduce = false;
|
||||
} else if (arg == "--no-parallel-reduce-view") {
|
||||
opts.par_reduce_view = false;
|
||||
} else {
|
||||
std::stringstream ss;
|
||||
ss << "unexpected argument \"" << arg << "\" at position " << i;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
printf("N V K M time_no_fence time_fence (time_no_fence_fenced)\n");
|
||||
|
||||
/* A backend may have different launch strategies for functors of different
|
||||
* sizes: test a variety of functor sizes.*/
|
||||
run<1>(N, M, K <= 1 ? K : 1, opts);
|
||||
run<16>(N, M, K <= 16 ? K : 16, opts);
|
||||
run<200>(N, M, K <= 200 ? K : 200, opts);
|
||||
run<3000>(N, M, K <= 3000 ? K : 3000, opts);
|
||||
run<30000>(N, M, K <= 30000 ? K : 30000, opts);
|
||||
}
|
||||
Kokkos::finalize();
|
||||
}
|
||||
4
lib/kokkos/benchmarks/policy_performance/CMakeLists.txt
Normal file
4
lib/kokkos/benchmarks/policy_performance/CMakeLists.txt
Normal file
@ -0,0 +1,4 @@
|
||||
KOKKOS_ADD_EXECUTABLE(
|
||||
policy_performance
|
||||
SOURCES main.cpp
|
||||
)
|
||||
@ -106,8 +106,9 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
Kokkos::parallel_reduce(
|
||||
"parallel_reduce warmup", Kokkos::TeamPolicy<>(10, 1),
|
||||
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type team,
|
||||
double& lval) { lval += 1; },
|
||||
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type&, double& lval) {
|
||||
lval += 1;
|
||||
},
|
||||
result);
|
||||
|
||||
using view_type_1d = Kokkos::View<double*, Kokkos::LayoutRight>;
|
||||
|
||||
@ -21,13 +21,13 @@ struct ParallelScanFunctor {
|
||||
using value_type = double;
|
||||
ViewType v;
|
||||
|
||||
ParallelScanFunctor(const ViewType& v_) : v(v_) {}
|
||||
explicit ParallelScanFunctor(const ViewType& v_) : v(v_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int idx, value_type& val, const bool& final) const {
|
||||
void operator()(const int idx, value_type& val, const bool& is_final) const {
|
||||
// inclusive scan
|
||||
val += v(idx);
|
||||
if (final) {
|
||||
if (is_final) {
|
||||
v(idx) = val;
|
||||
}
|
||||
}
|
||||
@ -109,7 +109,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
vector_result = 0.0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::ThreadVectorRange(team, vector_range),
|
||||
[&](const int vi, double& vval) { vval += 1; },
|
||||
[&](const int, double& vval) { vval += 1; },
|
||||
vector_result);
|
||||
}
|
||||
v2(idx, t) = vector_result;
|
||||
@ -128,7 +128,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
team_result = 0.0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t, double& lval) { lval += 1; }, team_result);
|
||||
[&](const int, double& lval) { lval += 1; }, team_result);
|
||||
}
|
||||
v1(idx) = team_result;
|
||||
// prevent compiler optimizing loop away
|
||||
@ -170,13 +170,13 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
for (int tr = 0; tr < thread_repeat; ++tr) {
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t, double& lval) {
|
||||
[&](const int, double& lval) {
|
||||
double vector_result = 0.0;
|
||||
for (int vr = 0; vr < inner_repeat; ++vr) {
|
||||
vector_result = 0.0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::ThreadVectorRange(team, vector_range),
|
||||
[&](const int vi, double& vval) { vval += 1; },
|
||||
[&](const int, double& vval) { vval += 1; },
|
||||
vector_result);
|
||||
lval += vector_result;
|
||||
}
|
||||
|
||||
4
lib/kokkos/benchmarks/stream/CMakeLists.txt
Normal file
4
lib/kokkos/benchmarks/stream/CMakeLists.txt
Normal file
@ -0,0 +1,4 @@
|
||||
KOKKOS_ADD_EXECUTABLE(
|
||||
stream
|
||||
SOURCES stream-kokkos.cpp
|
||||
)
|
||||
@ -229,7 +229,7 @@ do
|
||||
fi
|
||||
;;
|
||||
#Handle known nvcc args
|
||||
--dryrun|--verbose|--keep|--source-in-ptx|-src-in-ptx|--keep-dir*|-G|-lineinfo|-extended-lambda|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|--fmad=*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
|
||||
--dryrun|-dryrun|--verbose|--keep|-keep|--source-in-ptx|-src-in-ptx|--keep-dir*|-keep-dir*|-G|-lineinfo|--generate-line-info|-extended-lambda|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-res-usage|-fmad=*|--use_fast_math|-use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle more known nvcc args
|
||||
|
||||
@ -39,10 +39,12 @@ IF("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS)
|
||||
GLOBAL
|
||||
CHECK_CUDA_COMPILES)
|
||||
|
||||
ELSEIF(@Kokkos_ENABLE_CUDA@ AND NOT "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS)
|
||||
ELSEIF(@Kokkos_ENABLE_CUDA@
|
||||
AND NOT @KOKKOS_COMPILE_LANGUAGE@ STREQUAL CUDA
|
||||
AND NOT "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS)
|
||||
#
|
||||
# if CUDA was enabled, separable compilation was not specified, and current compiler
|
||||
# cannot compile CUDA, then set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK globally and
|
||||
# if CUDA was enabled, the compilation language was not set to CUDA, and separable compilation was not
|
||||
# specified, then set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK globally and
|
||||
# kokkos_launch_compiler will re-direct to the compiler used to compile CUDA code during installation.
|
||||
# kokkos_launch_compiler will re-direct if ${CMAKE_CXX_COMPILER} and -DKOKKOS_DEPENDENCE is present,
|
||||
# otherwise, the original command will be executed
|
||||
|
||||
@ -23,8 +23,6 @@
|
||||
#cmakedefine KOKKOS_ENABLE_CUDA
|
||||
#cmakedefine KOKKOS_ENABLE_HIP
|
||||
#cmakedefine KOKKOS_ENABLE_HPX
|
||||
#cmakedefine KOKKOS_ENABLE_MEMKIND
|
||||
#cmakedefine KOKKOS_ENABLE_LIBRT
|
||||
#cmakedefine KOKKOS_ENABLE_SYCL
|
||||
#cmakedefine KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED
|
||||
|
||||
@ -32,6 +30,7 @@
|
||||
#cmakedefine KOKKOS_ENABLE_CXX17
|
||||
#cmakedefine KOKKOS_ENABLE_CXX20
|
||||
#cmakedefine KOKKOS_ENABLE_CXX23
|
||||
#cmakedefine KOKKOS_ENABLE_CXX26
|
||||
|
||||
#cmakedefine KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
#cmakedefine KOKKOS_ENABLE_CUDA_UVM
|
||||
@ -45,7 +44,6 @@
|
||||
#cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
|
||||
#cmakedefine KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
|
||||
#cmakedefine KOKKOS_ENABLE_TUNING
|
||||
#cmakedefine KOKKOS_ENABLE_DEPRECATED_CODE_3
|
||||
#cmakedefine KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
#cmakedefine KOKKOS_ENABLE_DEPRECATION_WARNINGS
|
||||
#cmakedefine KOKKOS_ENABLE_LARGE_MEM_TESTS
|
||||
@ -53,17 +51,15 @@
|
||||
#cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION // deprecated
|
||||
#cmakedefine KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION
|
||||
#cmakedefine KOKKOS_ENABLE_IMPL_MDSPAN
|
||||
#cmakedefine KOKKOS_ENABLE_ATOMICS_BYPASS
|
||||
|
||||
/* TPL Settings */
|
||||
#cmakedefine KOKKOS_ENABLE_HWLOC
|
||||
#cmakedefine KOKKOS_USE_LIBRT
|
||||
#cmakedefine KOKKOS_ENABLE_HBWSPACE
|
||||
#cmakedefine KOKKOS_ENABLE_LIBDL
|
||||
#cmakedefine KOKKOS_ENABLE_LIBQUADMATH
|
||||
#cmakedefine KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
|
||||
#cmakedefine KOKKOS_ENABLE_ONEDPL
|
||||
#cmakedefine KOKKOS_ENABLE_ROCTHRUST
|
||||
|
||||
#cmakedefine KOKKOS_ARCH_SSE42
|
||||
#cmakedefine KOKKOS_ARCH_ARMV80
|
||||
#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX
|
||||
#cmakedefine KOKKOS_ARCH_ARMV81
|
||||
@ -78,6 +74,7 @@
|
||||
#cmakedefine KOKKOS_ARCH_POWER7
|
||||
#cmakedefine KOKKOS_ARCH_POWER8
|
||||
#cmakedefine KOKKOS_ARCH_POWER9
|
||||
#cmakedefine KOKKOS_ARCH_RISCV_SG2042
|
||||
#cmakedefine KOKKOS_ARCH_INTEL_GEN
|
||||
#cmakedefine KOKKOS_ARCH_INTEL_DG1
|
||||
#cmakedefine KOKKOS_ARCH_INTEL_GEN9
|
||||
|
||||
@ -7,7 +7,8 @@ IF (NOT CUDAToolkit_ROOT)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0")
|
||||
# FIXME CMake 3.28.4 creates more targets than we export
|
||||
IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0" AND CMAKE_VERSION VERSION_LESS "3.28.4")
|
||||
find_package(CUDAToolkit)
|
||||
ELSE()
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)
|
||||
|
||||
@ -1 +0,0 @@
|
||||
KOKKOS_FIND_IMPORTED(LIBRT HEADER time.h LIBRARY rt)
|
||||
@ -1 +0,0 @@
|
||||
KOKKOS_FIND_IMPORTED(MEMKIND HEADER memkind.h LIBRARY memkind)
|
||||
@ -43,4 +43,7 @@ ELSE()
|
||||
COMPILE_DEFINITIONS PSTL_USE_PARALLEL_POLICIES=0 _GLIBCXX_USE_TBB_PAR_BACKEND=0
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
# Export oneDPL as a Kokkos dependency
|
||||
KOKKOS_EXPORT_CMAKE_TPL(oneDPL)
|
||||
ENDIF()
|
||||
|
||||
15
lib/kokkos/cmake/Modules/FindTPLROCTHRUST.cmake
Normal file
15
lib/kokkos/cmake/Modules/FindTPLROCTHRUST.cmake
Normal file
@ -0,0 +1,15 @@
|
||||
# ROCm 5.6 and earlier set AMDGPU_TARGETS and GPU_TARGETS to all the supported
|
||||
# architectures. Therefore, we end up compiling Kokkos for all the supported
|
||||
# architecture. Starting with ROCm 5.7 AMDGPU_TARGETS and GPU_TARGETS are empty.
|
||||
# It is the user's job to set the variables. Since we are injecting the
|
||||
# architecture flag ourselves, we can let the variables empty. To replicate the
|
||||
# behavior of ROCm 5.7 and later for earlier version of ROCm we set
|
||||
# AMDGPU_TARGETS and GPU_TARGETS to empty and set the values in the cache. If
|
||||
# the values are not cached, FIND_PACKAGE(rocthrust) will overwrite them.
|
||||
SET(AMDGPU_TARGETS "" CACHE STRING "AMD GPU targets to compile for")
|
||||
SET(GPU_TARGETS "" CACHE STRING "GPU targets to compile for")
|
||||
FIND_PACKAGE(rocthrust REQUIRED)
|
||||
KOKKOS_CREATE_IMPORTED_TPL(ROCTHRUST INTERFACE LINK_LIBRARIES roc::rocthrust)
|
||||
|
||||
# Export ROCTHRUST as a Kokkos dependency
|
||||
KOKKOS_EXPORT_CMAKE_TPL(rocthrust)
|
||||
@ -49,7 +49,6 @@ DECLARE_AND_CHECK_HOST_ARCH(ARMV81 "ARMv8.1 Compatible CPU")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX "ARMv8 Cavium ThunderX CPU")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX2 "ARMv8 Cavium ThunderX2 CPU")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(A64FX "ARMv8.2 with SVE Support")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(WSM "Intel Westmere CPU")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(SNB "Intel Sandy/Ivy Bridge CPUs")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(HSW "Intel Haswell CPUs")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(BDW "Intel Broadwell Xeon E-class CPUs")
|
||||
@ -60,13 +59,12 @@ DECLARE_AND_CHECK_HOST_ARCH(SKX "Intel Skylake Xeon Server CPUs (A
|
||||
DECLARE_AND_CHECK_HOST_ARCH(KNC "Intel Knights Corner Xeon Phi")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(KNL "Intel Knights Landing Xeon Phi")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(SPR "Intel Sapphire Rapids Xeon Server CPUs (AVX512)")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(BGQ "IBM Blue Gene Q")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(POWER7 "IBM POWER7 CPUs")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(POWER8 "IBM POWER8 CPUs")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(POWER9 "IBM POWER9 CPUs")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(ZEN "AMD Zen architecture")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(ZEN2 "AMD Zen2 architecture")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(ZEN3 "AMD Zen3 architecture")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(RISCV_SG2042 "SG2042 (RISC-V) CPUs")
|
||||
|
||||
IF(Kokkos_ENABLE_CUDA OR Kokkos_ENABLE_OPENMPTARGET OR Kokkos_ENABLE_OPENACC OR Kokkos_ENABLE_SYCL)
|
||||
SET(KOKKOS_SHOW_CUDA_ARCHS ON)
|
||||
@ -191,9 +189,6 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
|
||||
ELSEIF(CUDAToolkit_BIN_DIR)
|
||||
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS --cuda-path=${CUDAToolkit_BIN_DIR}/..)
|
||||
ENDIF()
|
||||
IF (KOKKOS_ENABLE_CUDA)
|
||||
SET(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND ON CACHE BOOL "enable CUDA Clang workarounds" FORCE)
|
||||
ENDIF()
|
||||
ELSEIF (KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
|
||||
SET(CUDA_ARCH_FLAG "-gpu")
|
||||
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -cuda)
|
||||
@ -342,18 +337,6 @@ IF (KOKKOS_ARCH_ZEN3)
|
||||
SET(KOKKOS_ARCH_AVX2 ON)
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_WSM)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
|
||||
Cray NO-VALUE-SPECIFIED
|
||||
Intel -xSSE4.2
|
||||
MSVC NO-VALUE-SPECIFIED
|
||||
NVHPC -tp=px
|
||||
DEFAULT -msse4.2
|
||||
)
|
||||
SET(KOKKOS_ARCH_SSE42 ON)
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX)
|
||||
SET(KOKKOS_ARCH_AVX ON)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
@ -378,6 +361,23 @@ IF (KOKKOS_ARCH_HSW)
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_RISCV_SG2042)
|
||||
IF(NOT
|
||||
(KOKKOS_CXX_COMPILER_ID STREQUAL GNU
|
||||
AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12)
|
||||
OR
|
||||
(KOKKOS_CXX_COMPILER_ID STREQUAL Clang
|
||||
AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 14)
|
||||
)
|
||||
MESSAGE(SEND_ERROR "Only gcc >= 12 and clang >= 14 support RISC-V.")
|
||||
ENDIF()
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
|
||||
DEFAULT -march=rv64imafdcv
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
|
||||
IF (KOKKOS_ARCH_BDW)
|
||||
SET(KOKKOS_ARCH_AVX2 ON)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
@ -571,6 +571,11 @@ IF (KOKKOS_ENABLE_HIP)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
DEFAULT -fgpu-rdc
|
||||
)
|
||||
IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
|
||||
COMPILER_SPECIFIC_LINK_OPTIONS(
|
||||
DEFAULT --hip-link
|
||||
)
|
||||
ENDIF()
|
||||
ELSE()
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
DEFAULT -fno-gpu-rdc
|
||||
@ -588,32 +593,44 @@ IF (KOKKOS_ENABLE_SYCL)
|
||||
ENDIF()
|
||||
|
||||
# Check support for device_global variables
|
||||
# FIXME_SYCL Once the feature test macro SYCL_EXT_ONEAPI_DEVICE_GLOBAL is
|
||||
# available, use that instead.
|
||||
IF(KOKKOS_ENABLE_SYCL AND NOT BUILD_SHARED_LIBS)
|
||||
INCLUDE(CheckCXXSourceCompiles)
|
||||
# FIXME_SYCL If SYCL_EXT_ONEAPI_DEVICE_GLOBAL is defined, we can use device
|
||||
# global variables with shared libraries using the "non-separable compilation"
|
||||
# implementation. Otherwise, the feature is not supported when building shared
|
||||
# libraries. Thus, we don't even check for support if shared libraries are
|
||||
# requested and SYCL_EXT_ONEAPI_DEVICE_GLOBAL is not defined.
|
||||
IF(KOKKOS_ENABLE_SYCL)
|
||||
STRING(REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${KOKKOS_COMPILE_OPTIONS}")
|
||||
CHECK_CXX_SOURCE_COMPILES("
|
||||
#include <sycl/sycl.hpp>
|
||||
using namespace sycl::ext::oneapi::experimental;
|
||||
using namespace sycl;
|
||||
INCLUDE(CheckCXXSymbolExists)
|
||||
CHECK_CXX_SYMBOL_EXISTS(SYCL_EXT_ONEAPI_DEVICE_GLOBAL "sycl/sycl.hpp" KOKKOS_IMPL_HAVE_SYCL_EXT_ONEAPI_DEVICE_GLOBAL)
|
||||
IF (KOKKOS_IMPL_HAVE_SYCL_EXT_ONEAPI_DEVICE_GLOBAL)
|
||||
SET(KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED ON)
|
||||
# Use the non-separable compilation implementation to support shared libraries as well.
|
||||
COMPILER_SPECIFIC_FLAGS(DEFAULT -DDESUL_SYCL_DEVICE_GLOBAL_SUPPORTED)
|
||||
ELSEIF(NOT BUILD_SHARED_LIBS)
|
||||
INCLUDE(CheckCXXSourceCompiles)
|
||||
CHECK_CXX_SOURCE_COMPILES("
|
||||
#include <sycl/sycl.hpp>
|
||||
using namespace sycl::ext::oneapi::experimental;
|
||||
using namespace sycl;
|
||||
|
||||
SYCL_EXTERNAL device_global<int, decltype(properties(device_image_scope))> Foo;
|
||||
SYCL_EXTERNAL device_global<int, decltype(properties(device_image_scope))> Foo;
|
||||
|
||||
void bar(queue q) {
|
||||
q.single_task([=] {
|
||||
Foo = 42;
|
||||
});
|
||||
}
|
||||
void bar(queue q) {
|
||||
q.single_task([=] {
|
||||
Foo = 42;
|
||||
});
|
||||
}
|
||||
|
||||
int main(){ return 0; }
|
||||
"
|
||||
KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED)
|
||||
int main(){ return 0; }
|
||||
"
|
||||
KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED)
|
||||
|
||||
IF(KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
DEFAULT -fsycl-device-code-split=off -DDESUL_SYCL_DEVICE_GLOBAL_SUPPORTED
|
||||
)
|
||||
IF(KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED)
|
||||
# Only the separable compilation implementation is supported.
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
DEFAULT -fsycl-device-code-split=off -DDESUL_SYCL_DEVICE_GLOBAL_SUPPORTED
|
||||
)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
@ -767,30 +784,35 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
IntelLLVM -fopenmp-targets=spir64 -D__STRICT_ANSI__
|
||||
)
|
||||
ELSEIF(KOKKOS_ARCH_INTEL_GEN9)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen9" -D__STRICT_ANSI__
|
||||
ELSE()
|
||||
COMPILER_SPECIFIC_OPTIONS(
|
||||
IntelLLVM -fopenmp-targets=spir64_gen -D__STRICT_ANSI__
|
||||
)
|
||||
ELSEIF(KOKKOS_ARCH_INTEL_GEN11)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen11" -D__STRICT_ANSI__
|
||||
)
|
||||
ELSEIF(KOKKOS_ARCH_INTEL_GEN12LP)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen12lp" -D__STRICT_ANSI__
|
||||
)
|
||||
ELSEIF(KOKKOS_ARCH_INTEL_DG1)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device dg1" -D__STRICT_ANSI__
|
||||
)
|
||||
ELSEIF(KOKKOS_ARCH_INTEL_XEHP)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device 12.50.4" -D__STRICT_ANSI__
|
||||
)
|
||||
ELSEIF(KOKKOS_ARCH_INTEL_PVC)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device 12.60.7" -D__STRICT_ANSI__
|
||||
IF(KOKKOS_ARCH_INTEL_GEN9)
|
||||
COMPILER_SPECIFIC_LINK_OPTIONS(
|
||||
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen9"
|
||||
)
|
||||
ELSEIF(KOKKOS_ARCH_INTEL_GEN11)
|
||||
COMPILER_SPECIFIC_LINK_OPTIONS(
|
||||
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen11"
|
||||
)
|
||||
ELSEIF(KOKKOS_ARCH_INTEL_GEN12LP)
|
||||
COMPILER_SPECIFIC_LINK_OPTIONS(
|
||||
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device gen12lp"
|
||||
)
|
||||
ELSEIF(KOKKOS_ARCH_INTEL_DG1)
|
||||
COMPILER_SPECIFIC_LINK_OPTIONS(
|
||||
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device dg1"
|
||||
)
|
||||
ELSEIF(KOKKOS_ARCH_INTEL_XEHP)
|
||||
COMPILER_SPECIFIC_LINK_OPTIONS(
|
||||
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device 12.50.4"
|
||||
)
|
||||
ELSEIF(KOKKOS_ARCH_INTEL_PVC)
|
||||
COMPILER_SPECIFIC_LINK_OPTIONS(
|
||||
IntelLLVM -fopenmp-targets=spir64_gen -Xopenmp-target-backend "-device 12.60.7"
|
||||
)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
@ -1130,3 +1152,14 @@ MESSAGE(STATUS "Architectures:")
|
||||
FOREACH(Arch ${KOKKOS_ENABLED_ARCH_LIST})
|
||||
MESSAGE(STATUS " ${Arch}")
|
||||
ENDFOREACH()
|
||||
|
||||
|
||||
IF(KOKKOS_ENABLE_ATOMICS_BYPASS)
|
||||
IF(NOT _HOST_PARALLEL STREQUAL "NoTypeDefined" OR NOT _DEVICE_PARALLEL STREQUAL "NoTypeDefined")
|
||||
MESSAGE(FATAL_ERROR "Not allowed to disable atomics (via -DKokkos_ENABLE_AROMICS_BYPASS=ON) if neither a host parallel nor a device backend is enabled!")
|
||||
ENDIF()
|
||||
IF(NOT KOKKOS_ENABLE_SERIAL)
|
||||
MESSAGE(FATAL_ERROR "Implementation bug") # safeguard
|
||||
ENDIF()
|
||||
MESSAGE(STATUS "Atomics: **DISABLED**")
|
||||
ENDIF()
|
||||
|
||||
@ -152,6 +152,7 @@ ENDIF()
|
||||
SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) 8.0.0 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) 10.0.0 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) 15.0.0 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 8.2.0 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 19.0.5 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) 2021.1.1 or higher")
|
||||
@ -210,6 +211,10 @@ ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
ENDIF()
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL XL OR KOKKOS_CXX_COMPILER_ID STREQUAL XLClang)
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_OPENMPTARGET)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 15.0.0)
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED KOKKOS_CXX_HOST_COMPILER_ID)
|
||||
|
||||
@ -48,7 +48,6 @@ KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to allow lambda
|
||||
# resolved but we keep the option around a bit longer to be safe.
|
||||
KOKKOS_ENABLE_OPTION(IMPL_CUDA_MALLOC_ASYNC ON "Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2)")
|
||||
KOKKOS_ENABLE_OPTION(IMPL_NVHPC_AS_DEVICE_COMPILER OFF "Whether to allow nvc++ as Cuda device compiler")
|
||||
KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_3 OFF "Whether code deprecated in major release 3 is available" )
|
||||
KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_4 ON "Whether code deprecated in major release 4 is available" )
|
||||
KOKKOS_ENABLE_OPTION(DEPRECATION_WARNINGS ON "Whether to emit deprecation warnings" )
|
||||
KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP")
|
||||
@ -74,6 +73,7 @@ KOKKOS_ENABLE_OPTION(HIP_MULTIPLE_KERNEL_INSTANTIATIONS OFF "Whether multiple ke
|
||||
|
||||
# This option will go away eventually, but allows fallback to old implementation when needed.
|
||||
KOKKOS_ENABLE_OPTION(DESUL_ATOMICS_EXTERNAL OFF "Whether to use an external desul installation")
|
||||
KOKKOS_ENABLE_OPTION(ATOMICS_BYPASS OFF "**NOT RECOMMENDED** Whether to make atomics non-atomic for non-threaded MPI-only use cases")
|
||||
|
||||
KOKKOS_ENABLE_OPTION(IMPL_MDSPAN OFF "Whether to enable experimental mdspan support")
|
||||
KOKKOS_ENABLE_OPTION(MDSPAN_EXTERNAL OFF BOOL "Whether to use an external version of mdspan")
|
||||
|
||||
@ -7,6 +7,7 @@ KOKKOS_OPTION(CXX_STANDARD "" STRING "[[DEPRECATED - USE CMAKE_CXX_STANDARD INST
|
||||
SET(KOKKOS_ENABLE_CXX17 OFF)
|
||||
SET(KOKKOS_ENABLE_CXX20 OFF)
|
||||
SET(KOKKOS_ENABLE_CXX23 OFF)
|
||||
SET(KOKKOS_ENABLE_CXX26 OFF)
|
||||
IF (KOKKOS_CXX_STANDARD)
|
||||
MESSAGE(FATAL_ERROR "Setting the variable Kokkos_CXX_STANDARD in configuration is deprecated - set CMAKE_CXX_STANDARD directly instead")
|
||||
ENDIF()
|
||||
|
||||
@ -74,6 +74,10 @@ ELSEIF(KOKKOS_CXX_STANDARD STREQUAL "23")
|
||||
kokkos_set_cxx_standard_feature(23)
|
||||
SET(KOKKOS_CXX_INTERMEDIATE_STANDARD "2B")
|
||||
SET(KOKKOS_ENABLE_CXX23 ON)
|
||||
ELSEIF(KOKKOS_CXX_STANDARD STREQUAL "26")
|
||||
kokkos_set_cxx_standard_feature(26)
|
||||
SET(KOKKOS_CXX_INTERMEDIATE_STANDARD "2C")
|
||||
SET(KOKKOS_ENABLE_CXX26 ON)
|
||||
ELSE()
|
||||
MESSAGE(FATAL_ERROR "Kokkos requires C++17 or newer but requested ${KOKKOS_CXX_STANDARD}!")
|
||||
ENDIF()
|
||||
|
||||
@ -32,19 +32,21 @@ FUNCTION(KOKKOS_TPL_OPTION PKG DEFAULT)
|
||||
ENDFUNCTION()
|
||||
|
||||
KOKKOS_TPL_OPTION(HWLOC Off TRIBITS HWLOC)
|
||||
KOKKOS_TPL_OPTION(MEMKIND Off)
|
||||
IF(KOKKOS_ENABLE_MEMKIND)
|
||||
SET(KOKKOS_ENABLE_HBWSPACE ON)
|
||||
ENDIF()
|
||||
KOKKOS_TPL_OPTION(CUDA ${Kokkos_ENABLE_CUDA} TRIBITS CUDA)
|
||||
KOKKOS_TPL_OPTION(LIBRT Off)
|
||||
IF(KOKKOS_ENABLE_HIP AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC AND NOT
|
||||
KOKKOS_HAS_TRILINOS)
|
||||
SET(ROCM_DEFAULT ON)
|
||||
ELSE()
|
||||
SET(ROCM_DEFAULT OFF)
|
||||
ENDIF()
|
||||
IF(KOKKOS_ENABLE_HIP AND NOT KOKKOS_HAS_TRILINOS)
|
||||
SET(ROCTHRUST_DEFAULT ON)
|
||||
ELSE()
|
||||
SET(ROCTHRUST_DEFAULT OFF)
|
||||
ENDIF()
|
||||
KOKKOS_TPL_OPTION(ROCM ${ROCM_DEFAULT})
|
||||
KOKKOS_TPL_OPTION(ROCTHRUST ${ROCTHRUST_DEFAULT})
|
||||
|
||||
IF(KOKKOS_ENABLE_SYCL AND NOT KOKKOS_HAS_TRILINOS)
|
||||
SET(ONEDPL_DEFAULT ON)
|
||||
ELSE()
|
||||
@ -77,21 +79,18 @@ KOKKOS_TPL_OPTION(LIBQUADMATH ${LIBQUADMATH_DEFAULT} TRIBITS quadmath)
|
||||
|
||||
#Make sure we use our local FindKokkosCuda.cmake
|
||||
KOKKOS_IMPORT_TPL(HPX INTERFACE)
|
||||
IF (NOT KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
|
||||
KOKKOS_IMPORT_TPL(CUDA INTERFACE)
|
||||
ENDIF()
|
||||
KOKKOS_IMPORT_TPL(CUDA INTERFACE)
|
||||
KOKKOS_IMPORT_TPL(HWLOC)
|
||||
KOKKOS_IMPORT_TPL(LIBRT)
|
||||
KOKKOS_IMPORT_TPL(LIBDL)
|
||||
KOKKOS_IMPORT_TPL(MEMKIND)
|
||||
IF (NOT WIN32)
|
||||
KOKKOS_IMPORT_TPL(THREADS INTERFACE)
|
||||
ENDIF()
|
||||
IF (NOT KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
|
||||
KOKKOS_IMPORT_TPL(ROCM INTERFACE)
|
||||
KOKKOS_IMPORT_TPL(ONEDPL INTERFACE)
|
||||
ENDIF()
|
||||
KOKKOS_IMPORT_TPL(ONEDPL INTERFACE)
|
||||
KOKKOS_IMPORT_TPL(LIBQUADMATH)
|
||||
KOKKOS_IMPORT_TPL(ROCTHRUST)
|
||||
|
||||
IF (Kokkos_ENABLE_DESUL_ATOMICS_EXTERNAL)
|
||||
find_package(desul REQUIRED COMPONENTS atomics)
|
||||
@ -119,7 +118,3 @@ STRING(REPLACE ";" "\n" KOKKOS_TPL_EXPORT_TEMP "${KOKKOS_TPL_EXPORTS}")
|
||||
#Convert to a regular variable
|
||||
UNSET(KOKKOS_TPL_EXPORTS CACHE)
|
||||
SET(KOKKOS_TPL_EXPORTS ${KOKKOS_TPL_EXPORT_TEMP})
|
||||
IF (KOKKOS_ENABLE_MEMKIND)
|
||||
SET(KOKKOS_ENABLE_HBWSPACE)
|
||||
LIST(APPEND KOKKOS_MEMSPACE_LIST HBWSpace)
|
||||
ENDIF()
|
||||
|
||||
@ -237,18 +237,10 @@ ENDMACRO()
|
||||
## KOKKOS_DECLARE is the declaration set
|
||||
## KOKKOS_POST_INCLUDE is included at the end of Kokkos_Core.hpp
|
||||
MACRO(KOKKOS_CONFIGURE_CORE)
|
||||
SET(FWD_BACKEND_LIST)
|
||||
FOREACH(MEMSPACE ${KOKKOS_MEMSPACE_LIST})
|
||||
LIST(APPEND FWD_BACKEND_LIST ${MEMSPACE})
|
||||
ENDFOREACH()
|
||||
FOREACH(BACKEND_ ${KOKKOS_ENABLED_DEVICES})
|
||||
LIST(APPEND FWD_BACKEND_LIST ${BACKEND_})
|
||||
ENDFOREACH()
|
||||
MESSAGE(STATUS "Kokkos Devices: ${KOKKOS_ENABLED_DEVICES}, Kokkos Backends: ${FWD_BACKEND_LIST}")
|
||||
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_FwdBackend.hpp "KOKKOS_FWD" "fwd/Kokkos_Fwd" "${FWD_BACKEND_LIST}")
|
||||
MESSAGE(STATUS "Kokkos Backends: ${KOKKOS_ENABLED_DEVICES}")
|
||||
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_FwdBackend.hpp "KOKKOS_FWD" "fwd/Kokkos_Fwd" "${KOKKOS_ENABLED_DEVICES}")
|
||||
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_SetupBackend.hpp "KOKKOS_SETUP" "setup/Kokkos_Setup" "${DEVICE_SETUP_LIST}")
|
||||
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_DeclareBackend.hpp "KOKKOS_DECLARE" "decl/Kokkos_Declare" "${FWD_BACKEND_LIST}")
|
||||
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_PostInclude.hpp "KOKKOS_POST_INCLUDE" "Kokkos_Post_Include" "${KOKKOS_BACKEND_POST_INCLUDE_LIST}")
|
||||
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_DeclareBackend.hpp "KOKKOS_DECLARE" "decl/Kokkos_Declare" "${KOKKOS_ENABLED_DEVICES}")
|
||||
SET(_DEFAULT_HOST_MEMSPACE "::Kokkos::HostSpace")
|
||||
KOKKOS_OPTION(DEFAULT_DEVICE_MEMORY_SPACE "" STRING "Override default device memory space")
|
||||
KOKKOS_OPTION(DEFAULT_HOST_MEMORY_SPACE "" STRING "Override default host memory space")
|
||||
@ -309,7 +301,6 @@ MACRO(KOKKOS_INSTALL_ADDITIONAL_FILES)
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_Config_FwdBackend.hpp"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_Config_SetupBackend.hpp"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_Config_DeclareBackend.hpp"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_Config_PostInclude.hpp"
|
||||
DESTINATION ${KOKKOS_HEADER_DIR})
|
||||
ENDMACRO()
|
||||
|
||||
|
||||
@ -1,773 +0,0 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
#
|
||||
# Global config
|
||||
#
|
||||
|
||||
set -o pipefail
|
||||
|
||||
# Determine current machine.
|
||||
|
||||
MACHINE=""
|
||||
HOSTNAME=$(hostname)
|
||||
PROCESSOR=`uname -p`
|
||||
|
||||
if [[ "$HOSTNAME" =~ (white|ride).* ]]; then
|
||||
MACHINE=white
|
||||
module load git
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" =~ .*bowman.* ]]; then
|
||||
MACHINE=bowman
|
||||
module load git
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" == n* ]]; then # Warning: very generic name
|
||||
if [[ "$PROCESSOR" = "aarch64" ]]; then
|
||||
MACHINE=sullivan
|
||||
module load git
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" == node* ]]; then # Warning: very generic name
|
||||
if [[ "$MACHINE" = "" ]]; then
|
||||
MACHINE=shepard
|
||||
module load git
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" == apollo\.* ]]; then
|
||||
MACHINE=apollo
|
||||
module load git
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" == sullivan ]]; then
|
||||
MACHINE=sullivan
|
||||
module load git
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" == mayer\.* ]]; then
|
||||
MACHINE=mayer
|
||||
# module load git
|
||||
fi
|
||||
if [[ "$HOSTNAME" == cn* ]]; then # Warning: very generic name
|
||||
MACHINE=mayer
|
||||
fi
|
||||
|
||||
if [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
|
||||
if [[ "$MACHINE" = "" ]]; then
|
||||
MACHINE=sems
|
||||
module load sems-git
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$MACHINE" = "" ]]; then
|
||||
echo "Unrecognized machine" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Running on machine: $MACHINE"
|
||||
|
||||
GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
|
||||
IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
ARM_GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
|
||||
CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
|
||||
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
|
||||
CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial"
|
||||
|
||||
GCC_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
|
||||
IBM_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
CLANG_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
INTEL_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
#CUDA_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
CUDA_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
PGI_WARNING_FLAGS=""
|
||||
|
||||
# Default. Machine specific can override.
|
||||
DEBUG=False
|
||||
ARGS=""
|
||||
CUSTOM_BUILD_LIST=""
|
||||
DRYRUN=False
|
||||
BUILD_ONLY=False
|
||||
declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
TEST_SCRIPT=False
|
||||
SKIP_HWLOC=False
|
||||
SPOT_CHECK=False
|
||||
|
||||
PRINT_HELP=False
|
||||
OPT_FLAG=""
|
||||
CXX_FLAGS_EXTRA=""
|
||||
LD_FLAGS_EXTRA=""
|
||||
KOKKOS_OPTIONS=""
|
||||
|
||||
#
|
||||
# Handle arguments.
|
||||
#
|
||||
|
||||
while [[ $# > 0 ]]
|
||||
do
|
||||
key="$1"
|
||||
|
||||
case $key in
|
||||
--kokkos-path*)
|
||||
KOKKOS_PATH="${key#*=}"
|
||||
;;
|
||||
--build-list*)
|
||||
CUSTOM_BUILD_LIST="${key#*=}"
|
||||
;;
|
||||
--debug*)
|
||||
DEBUG=True
|
||||
;;
|
||||
--build-only*)
|
||||
BUILD_ONLY=True
|
||||
;;
|
||||
--test-script*)
|
||||
TEST_SCRIPT=True
|
||||
;;
|
||||
--skip-hwloc*)
|
||||
SKIP_HWLOC=True
|
||||
;;
|
||||
--num*)
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}"
|
||||
;;
|
||||
--dry-run*)
|
||||
DRYRUN=True
|
||||
;;
|
||||
--spot-check*)
|
||||
SPOT_CHECK=True
|
||||
;;
|
||||
--arch*)
|
||||
ARCH_FLAG="--arch=${key#*=}"
|
||||
;;
|
||||
--opt-flag*)
|
||||
OPT_FLAG="${key#*=}"
|
||||
;;
|
||||
--with-cuda-options*)
|
||||
KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}"
|
||||
;;
|
||||
--with-options*)
|
||||
KOKKOS_OPTIONS="--with-options=enable_large_mem_tests,${key#*=}"
|
||||
;;
|
||||
--cxxflags-extra*)
|
||||
CXX_FLAGS_EXTRA="${key#*=}"
|
||||
;;
|
||||
--ldflags-extra*)
|
||||
LD_FLAGS_EXTRA="${key#*=}"
|
||||
;;
|
||||
--help*)
|
||||
PRINT_HELP=True
|
||||
;;
|
||||
*)
|
||||
# args, just append
|
||||
ARGS="$ARGS $1"
|
||||
;;
|
||||
esac
|
||||
|
||||
shift
|
||||
done
|
||||
|
||||
SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )
|
||||
|
||||
# Set kokkos path.
|
||||
if [ -z "$KOKKOS_PATH" ]; then
|
||||
KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
|
||||
else
|
||||
# Ensure KOKKOS_PATH is abs path.
|
||||
KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
|
||||
fi
|
||||
|
||||
UNCOMMITTED=`cd ${KOKKOS_PATH}; git status --porcelain 2>/dev/null`
|
||||
if ! [ -z "$UNCOMMITTED" ]; then
|
||||
echo "WARNING!! THE FOLLOWING CHANGES ARE UNCOMMITTED!! :"
|
||||
echo "$UNCOMMITTED"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
GITSTATUS=`cd ${KOKKOS_PATH}; git log -n 1 --format=oneline`
|
||||
echo "Repository Status: " ${GITSTATUS}
|
||||
echo ""
|
||||
echo ""
|
||||
|
||||
#
|
||||
# Machine specific config.
|
||||
#
|
||||
|
||||
if [ "$MACHINE" = "sems" ]; then
|
||||
source /projects/sems/modulefiles/utils/sems-modules-init.sh
|
||||
|
||||
BASE_MODULE_LIST="sems-env,kokkos-env,kokkos-hwloc/1.10.1/base,sems-<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
|
||||
CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG=""
|
||||
fi
|
||||
|
||||
if [ "$SPOT_CHECK" = "True" ]; then
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/6.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
|
||||
"intel/17.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
|
||||
"cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
else
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/16.0.3 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
fi
|
||||
elif [ "$MACHINE" = "white" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=32
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
|
||||
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/6.4.0,ibm/xl/16.1.0"
|
||||
|
||||
# Don't do pthread on white.
|
||||
GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/6.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
|
||||
"cuda/9.0.103 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=Power8,Kepler37"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
|
||||
elif [ "$MACHINE" = "bowman" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=32
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
|
||||
|
||||
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("intel/16.4.258 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/17.2.174 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/18.0.128 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
)
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=KNL"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
|
||||
elif [ "$MACHINE" = "sullivan" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=96
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/6.1.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS")
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=ARMv8-ThunderX"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
|
||||
elif [ "$MACHINE" = "mayer" ]; then
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=96
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
ARM_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/7.2.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"arm/1.4.0 $ARM_MODULE_LIST $ARM_GCC_BUILD_LIST armclang++ $CLANG_WARNING_FLAGS")
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=ARMv8-TX2"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
|
||||
elif [ "$MACHINE" = "shepard" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=32
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
BASE_MODULE_LIST_INTEL="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("intel/17.4.196 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/18.0.128 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"pgi/17.10.0 $BASE_MODULE_LIST $GCC_BUILD_LIST pgc++ $PGI_WARNING_FLAGS"
|
||||
)
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=HSW"
|
||||
fi
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
|
||||
elif [ "$MACHINE" = "apollo" ]; then
|
||||
source /projects/sems/modulefiles/utils/sems-modules-init.sh
|
||||
module use /home/projects/modulefiles/local/x86-64
|
||||
module load kokkos-env
|
||||
|
||||
module load sems-git
|
||||
module load sems-tex
|
||||
module load sems-cmake/3.5.2
|
||||
module load sems-gdb
|
||||
|
||||
SKIP_HWLOC=True
|
||||
|
||||
BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
|
||||
CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
|
||||
CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
|
||||
|
||||
CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/9.0.69"
|
||||
NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0"
|
||||
|
||||
BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP"
|
||||
BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread"
|
||||
BUILD_LIST_CLANG="Serial,Pthread,OpenMP"
|
||||
|
||||
if [ "$SPOT_CHECK" = "True" ]; then
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread,OpenMP" clang++ $CUDA_WARNING_FLAGS"
|
||||
"cuda/9.1 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
else
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("cuda/9.1 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"clang/6.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
|
||||
"clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
|
||||
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
)
|
||||
fi
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=SNB,Volta70"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
|
||||
else
|
||||
echo "Unhandled machine $MACHINE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
declare -i NUM_RESULTS_TO_KEEP=7
|
||||
|
||||
RESULT_ROOT_PREFIX=TestAll
|
||||
|
||||
if [ "$PRINT_HELP" = "True" ]; then
|
||||
echo "test_all_sandia <ARGS> <OPTIONS>:"
|
||||
echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory"
|
||||
echo " Defaults to root repo containing this script"
|
||||
echo "--debug: Run tests in debug. Defaults to False"
|
||||
echo "--test-script: Test this script, not Kokkos"
|
||||
echo "--skip-hwloc: Do not do hwloc tests"
|
||||
echo "--num=N: Number of jobs to run in parallel"
|
||||
echo "--spot-check: Minimal test set to issue pull request"
|
||||
echo "--dry-run: Just print what would be executed"
|
||||
echo "--build-only: Just do builds, don't run anything"
|
||||
echo "--opt-flag=FLAG: Optimization flag (default: -O3)"
|
||||
echo "--cxxflags-extra=FLAGS: Extra flags to be added to CXX_FLAGS"
|
||||
echo "--ldflags-extra=FLAGS: Extra flags to be added to LD_FLAGS"
|
||||
echo "--arch=ARCHITECTURE: overwrite architecture flags"
|
||||
echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS"
|
||||
echo "--build-list=BUILD,BUILD,BUILD..."
|
||||
echo " Provide a comma-separated list of builds instead of running all builds"
|
||||
echo " Valid items:"
|
||||
echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial"
|
||||
echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial"
|
||||
echo ""
|
||||
|
||||
echo "ARGS: list of expressions matching compilers to test"
|
||||
echo " supported compilers sems"
|
||||
for COMPILER_DATA in "${COMPILERS[@]}"; do
|
||||
ARR=($COMPILER_DATA)
|
||||
COMPILER=${ARR[0]}
|
||||
echo " $COMPILER"
|
||||
done
|
||||
echo ""
|
||||
|
||||
echo "Examples:"
|
||||
echo " Run all tests"
|
||||
echo " % test_all_sandia"
|
||||
echo ""
|
||||
echo " Run all gcc tests"
|
||||
echo " % test_all_sandia gcc"
|
||||
echo ""
|
||||
echo " Run all gcc/4.8.4 and all intel tests"
|
||||
echo " % test_all_sandia gcc/4.8.4 intel"
|
||||
echo ""
|
||||
echo " Run all tests in debug"
|
||||
echo " % test_all_sandia --debug"
|
||||
echo ""
|
||||
echo " Run gcc/4.8.4 and only do OpenMP and OpenMP_Serial builds"
|
||||
echo " % test_all_sandia gcc/4.8.4 --build-list=OpenMP,OpenMP_Serial"
|
||||
echo ""
|
||||
echo "If you want to kill the tests, do:"
|
||||
echo " hit ctrl-z"
|
||||
echo " % kill -9 %1"
|
||||
echo
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Set build type.
|
||||
if [ "$DEBUG" = "True" ]; then
|
||||
BUILD_TYPE=debug
|
||||
else
|
||||
BUILD_TYPE=release
|
||||
fi
|
||||
|
||||
# If no args provided, do all compilers.
|
||||
if [ -z "$ARGS" ]; then
|
||||
ARGS='?'
|
||||
fi
|
||||
|
||||
# Process args to figure out which compilers to test.
|
||||
COMPILERS_TO_TEST=""
|
||||
|
||||
for ARG in $ARGS; do
|
||||
for COMPILER_DATA in "${COMPILERS[@]}"; do
|
||||
ARR=($COMPILER_DATA)
|
||||
COMPILER=${ARR[0]}
|
||||
|
||||
if [[ "$COMPILER" = $ARG* ]]; then
|
||||
if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then
|
||||
COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER"
|
||||
else
|
||||
echo "Tried to add $COMPILER twice"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
#
|
||||
# Functions.
|
||||
#
|
||||
|
||||
# get_compiler_name <COMPILER>
|
||||
get_compiler_name() {
|
||||
echo $1 | cut -d/ -f1
|
||||
}
|
||||
|
||||
# get_compiler_version <COMPILER>
|
||||
get_compiler_version() {
|
||||
echo $1 | cut -d/ -f2
|
||||
}
|
||||
|
||||
# Do not call directly.
|
||||
get_compiler_data() {
|
||||
local compiler=$1
|
||||
local item=$2
|
||||
local compiler_name=$(get_compiler_name $compiler)
|
||||
local compiler_vers=$(get_compiler_version $compiler)
|
||||
|
||||
local compiler_data
|
||||
for compiler_data in "${COMPILERS[@]}" ; do
|
||||
local arr=($compiler_data)
|
||||
|
||||
if [ "$compiler" = "${arr[0]}" ]; then
|
||||
echo "${arr[$item]}" | tr , ' ' | sed -e "s/<COMPILER_NAME>/$compiler_name/g" -e "s/<COMPILER_VERSION>/$compiler_vers/g"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
|
||||
# Not found.
|
||||
echo "Unreconized compiler $compiler" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
#
|
||||
# For all getters, usage: <GETTER> <COMPILER>
|
||||
#
|
||||
|
||||
get_compiler_modules() {
|
||||
get_compiler_data $1 1
|
||||
}
|
||||
|
||||
get_compiler_build_list() {
|
||||
get_compiler_data $1 2
|
||||
}
|
||||
|
||||
get_compiler_exe_name() {
|
||||
get_compiler_data $1 3
|
||||
}
|
||||
|
||||
get_compiler_warning_flags() {
|
||||
get_compiler_data $1 4
|
||||
}
|
||||
|
||||
run_cmd() {
|
||||
echo "RUNNING: $*"
|
||||
if [ "$DRYRUN" != "True" ]; then
|
||||
eval "$* 2>&1"
|
||||
fi
|
||||
}
|
||||
|
||||
# report_and_log_test_results <SUCCESS> <DESC> <COMMENT>
|
||||
report_and_log_test_result() {
|
||||
# Use sane var names.
|
||||
local success=$1; local desc=$2; local comment=$3;
|
||||
|
||||
if [ "$success" = "0" ]; then
|
||||
echo " PASSED $desc"
|
||||
echo $comment > $PASSED_DIR/$desc
|
||||
else
|
||||
# For failures, comment should be the name of the phase that failed.
|
||||
echo " FAILED $desc" >&2
|
||||
echo $comment > $FAILED_DIR/$desc
|
||||
cat ${desc}.${comment}.log
|
||||
fi
|
||||
}
|
||||
|
||||
setup_env() {
|
||||
local compiler=$1
|
||||
local compiler_modules=$(get_compiler_modules $compiler)
|
||||
|
||||
module purge
|
||||
|
||||
local mod
|
||||
for mod in $compiler_modules; do
|
||||
echo "Loading module $mod"
|
||||
module load $mod 2>&1
|
||||
# It is ridiculously hard to check for the success of a loaded
|
||||
# module. Module does not return error codes and piping to grep
|
||||
# causes module to run in a subshell.
|
||||
module list 2>&1 | grep "$mod" >& /dev/null || return 1
|
||||
done
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# single_build_and_test <COMPILER> <BUILD> <BUILD_TYPE>
|
||||
single_build_and_test() {
|
||||
# Use sane var names.
|
||||
local compiler=$1; local build=$2; local build_type=$3;
|
||||
|
||||
# Set up env.
|
||||
mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type"
|
||||
cd $ROOT_DIR/$compiler/"${build}-$build_type"
|
||||
local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g')
|
||||
setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
|
||||
|
||||
# Set up flags.
|
||||
local compiler_warning_flags=$(get_compiler_warning_flags $compiler)
|
||||
local compiler_exe=$(get_compiler_exe_name $compiler)
|
||||
|
||||
if [[ "$build_type" = hwloc* ]]; then
|
||||
local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info)))
|
||||
fi
|
||||
|
||||
if [[ "$OPT_FLAG" = "" ]]; then
|
||||
OPT_FLAG="-O3"
|
||||
fi
|
||||
|
||||
if [[ "$build_type" = *debug* ]]; then
|
||||
local extra_args="$extra_args --debug"
|
||||
local cxxflags="-g $compiler_warning_flags"
|
||||
local ldflags="-g"
|
||||
else
|
||||
local cxxflags="$OPT_FLAG $compiler_warning_flags"
|
||||
local ldflags="${OPT_FLAG}"
|
||||
fi
|
||||
|
||||
local cxxflags="${cxxflags} ${CXX_FLAGS_EXTRA}"
|
||||
local ldflags="${ldflags} ${LD_FLAGS_EXTRA}"
|
||||
|
||||
if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then
|
||||
local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS"
|
||||
fi
|
||||
if [[ "$KOKKOS_OPTIONS" != "" ]]; then
|
||||
local extra_args="$extra_args $KOKKOS_OPTIONS"
|
||||
else
|
||||
local extra_args="$extra_args --with-options=enable_large_mem_tests"
|
||||
fi
|
||||
|
||||
echo " Starting job $desc"
|
||||
|
||||
local comment="no_comment"
|
||||
|
||||
if [ "$TEST_SCRIPT" = "True" ]; then
|
||||
local rand=$[ 1 + $[ RANDOM % 10 ]]
|
||||
sleep $rand
|
||||
|
||||
if [ $rand -gt 5 ]; then
|
||||
run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; }
|
||||
fi
|
||||
else
|
||||
run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --ldflags=\"$ldflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
|
||||
local -i build_start_time=$(date +%s)
|
||||
run_cmd make -j 48 build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
|
||||
local -i build_end_time=$(date +%s)
|
||||
comment="build_time=$(($build_end_time-$build_start_time))"
|
||||
|
||||
if [[ "$BUILD_ONLY" == False ]]; then
|
||||
run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; }
|
||||
local -i run_end_time=$(date +%s)
|
||||
comment="$comment run_time=$(($run_end_time-$build_end_time))"
|
||||
fi
|
||||
fi
|
||||
|
||||
report_and_log_test_result 0 $desc "$comment"
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# wait_for_jobs <NUM-JOBS>
|
||||
wait_for_jobs() {
|
||||
local -i max_jobs=$1
|
||||
local -i num_active_jobs=$(jobs | wc -l)
|
||||
while [ $num_active_jobs -ge $max_jobs ]
|
||||
do
|
||||
sleep 1
|
||||
num_active_jobs=$(jobs | wc -l)
|
||||
jobs >& /dev/null
|
||||
done
|
||||
}
|
||||
|
||||
# run_in_background <COMPILER> <BUILD> <BUILD_TYPE>
|
||||
run_in_background() {
|
||||
local compiler=$1
|
||||
|
||||
local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL
|
||||
# Don't override command line input.
|
||||
# if [[ "$BUILD_ONLY" == True ]]; then
|
||||
# num_jobs=8
|
||||
# else
|
||||
if [[ "$compiler" == cuda* ]]; then
|
||||
num_jobs=1
|
||||
fi
|
||||
if [[ "$compiler" == clang ]]; then
|
||||
num_jobs=1
|
||||
fi
|
||||
# fi
|
||||
wait_for_jobs $num_jobs
|
||||
|
||||
single_build_and_test $* &
|
||||
}
|
||||
|
||||
# build_and_test_all <COMPILER>
|
||||
build_and_test_all() {
|
||||
# Get compiler data.
|
||||
local compiler=$1
|
||||
if [ -z "$CUSTOM_BUILD_LIST" ]; then
|
||||
local compiler_build_list=$(get_compiler_build_list $compiler)
|
||||
else
|
||||
local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ')
|
||||
fi
|
||||
|
||||
# Do builds.
|
||||
local build
|
||||
for build in $compiler_build_list
|
||||
do
|
||||
run_in_background $compiler $build $BUILD_TYPE
|
||||
|
||||
# If not cuda, do a hwloc test too.
|
||||
if [[ "$compiler" != cuda* && "$SKIP_HWLOC" == False ]]; then
|
||||
run_in_background $compiler $build "hwloc-$BUILD_TYPE"
|
||||
fi
|
||||
done
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
get_test_root_dir() {
|
||||
local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort)
|
||||
local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l)
|
||||
local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP}
|
||||
|
||||
if [ $num_to_delete -gt 0 ]; then
|
||||
/bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete)
|
||||
fi
|
||||
|
||||
echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S")
|
||||
}
|
||||
|
||||
wait_summarize_and_exit() {
|
||||
wait_for_jobs 1
|
||||
|
||||
echo "#######################################################"
|
||||
echo "PASSED TESTS"
|
||||
echo "#######################################################"
|
||||
|
||||
local passed_test
|
||||
for passed_test in $(\ls -1 $PASSED_DIR | sort)
|
||||
do
|
||||
echo $passed_test $(cat $PASSED_DIR/$passed_test)
|
||||
done
|
||||
|
||||
local -i rv=0
|
||||
if [ "$(ls -A $FAILED_DIR)" ]; then
|
||||
echo "#######################################################"
|
||||
echo "FAILED TESTS"
|
||||
echo "#######################################################"
|
||||
|
||||
local failed_test
|
||||
for failed_test in $(\ls -1 $FAILED_DIR | sort)
|
||||
do
|
||||
echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)"
|
||||
rv=$rv+1
|
||||
done
|
||||
fi
|
||||
|
||||
exit $rv
|
||||
}
|
||||
|
||||
#
|
||||
# Main.
|
||||
#
|
||||
|
||||
ROOT_DIR=$(get_test_root_dir)
|
||||
mkdir -p $ROOT_DIR
|
||||
cd $ROOT_DIR
|
||||
|
||||
PASSED_DIR=$ROOT_DIR/results/passed
|
||||
FAILED_DIR=$ROOT_DIR/results/failed
|
||||
mkdir -p $PASSED_DIR
|
||||
mkdir -p $FAILED_DIR
|
||||
|
||||
echo "Going to test compilers: " $COMPILERS_TO_TEST
|
||||
for COMPILER in $COMPILERS_TO_TEST; do
|
||||
echo "Testing compiler $COMPILER"
|
||||
build_and_test_all $COMPILER
|
||||
done
|
||||
|
||||
wait_summarize_and_exit
|
||||
@ -1,4 +0,0 @@
|
||||
packages:
|
||||
kokkos:
|
||||
variants: +cuda +openmp +volta70 +cuda_lambda +wrapper ^cuda@10.1
|
||||
compiler: [gcc@7.2.0]
|
||||
@ -28,24 +28,6 @@
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
namespace Impl {
|
||||
//! Either append to the label if the property already exists, or set it.
|
||||
template <typename... P>
|
||||
auto with_updated_label(const ViewCtorProp<P...>& view_ctor_prop,
|
||||
const std::string& label) {
|
||||
using vcp_t = ViewCtorProp<P...>;
|
||||
//! If the label property is already set, append. Otherwise, set label.
|
||||
if constexpr (vcp_t::has_label) {
|
||||
vcp_t new_ctor_props(view_ctor_prop);
|
||||
static_cast<ViewCtorProp<void, std::string>&>(new_ctor_props)
|
||||
.value.append(label);
|
||||
return new_ctor_props;
|
||||
} else {
|
||||
return Impl::with_properties_if_unset(view_ctor_prop, label);
|
||||
}
|
||||
}
|
||||
} // namespace Impl
|
||||
|
||||
template <typename Device = Kokkos::DefaultExecutionSpace>
|
||||
class Bitset;
|
||||
|
||||
@ -92,9 +74,10 @@ class Bitset {
|
||||
using block_view_type = View<unsigned*, Device, MemoryTraits<RandomAccess>>;
|
||||
|
||||
public:
|
||||
/// constructor
|
||||
Bitset() = default;
|
||||
|
||||
/// arg_size := number of bit in set
|
||||
Bitset(unsigned arg_size = 0u) : Bitset(Kokkos::view_alloc(), arg_size) {}
|
||||
Bitset(unsigned arg_size) : Bitset(Kokkos::view_alloc(), arg_size) {}
|
||||
|
||||
template <class... P>
|
||||
Bitset(const Impl::ViewCtorProp<P...>& arg_prop, unsigned arg_size)
|
||||
@ -108,9 +91,8 @@ class Bitset {
|
||||
"Allocation properties should not contain the 'pointer' property.");
|
||||
|
||||
//! Update 'label' property and allocate.
|
||||
const auto prop_copy = Kokkos::Impl::with_updated_label(
|
||||
Impl::with_properties_if_unset(arg_prop, std::string("Bitset")),
|
||||
" - blocks");
|
||||
const auto prop_copy =
|
||||
Impl::with_properties_if_unset(arg_prop, std::string("Bitset"));
|
||||
m_blocks =
|
||||
block_view_type(prop_copy, ((m_size + block_mask) >> block_shift));
|
||||
|
||||
@ -310,8 +292,8 @@ class Bitset {
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned m_size;
|
||||
unsigned m_last_block_mask;
|
||||
unsigned m_size = 0;
|
||||
unsigned m_last_block_mask = 0;
|
||||
block_view_type m_blocks;
|
||||
|
||||
private:
|
||||
|
||||
@ -292,15 +292,6 @@ class DualView : public ViewTraits<DataType, Properties...> {
|
||||
d_view(src.d_view),
|
||||
h_view(src.h_view) {}
|
||||
|
||||
//! Copy assignment operator (shallow copy assignment)
|
||||
template <typename DT, typename... DP>
|
||||
DualView& operator=(const DualView<DT, DP...>& src) {
|
||||
modified_flags = src.modified_flags;
|
||||
d_view = src.d_view;
|
||||
h_view = src.h_view;
|
||||
return *this;
|
||||
}
|
||||
|
||||
//! Subview constructor
|
||||
template <class DT, class... DP, class Arg0, class... Args>
|
||||
DualView(const DualView<DT, DP...>& src, const Arg0& arg0, Args... args)
|
||||
|
||||
@ -1340,7 +1340,7 @@ class ViewMapping<
|
||||
|
||||
template <class MemoryTraits>
|
||||
struct apply {
|
||||
static_assert(Kokkos::is_memory_traits<MemoryTraits>::value, "");
|
||||
static_assert(Kokkos::is_memory_traits<MemoryTraits>::value);
|
||||
|
||||
using traits_type =
|
||||
Kokkos::ViewTraits<data_type, array_layout,
|
||||
@ -1653,8 +1653,17 @@ KOKKOS_FUNCTION auto as_view_of_rank_n(
|
||||
Kokkos::abort("Converting DynRankView to a View of mis-matched rank!");)
|
||||
}
|
||||
|
||||
return View<typename RankDataType<T, N>::type, Args...>(
|
||||
v.data(), v.impl_map().layout());
|
||||
auto layout = v.impl_map().layout();
|
||||
|
||||
if constexpr (std::is_same_v<decltype(layout), Kokkos::LayoutLeft> ||
|
||||
std::is_same_v<decltype(layout), Kokkos::LayoutRight> ||
|
||||
std::is_same_v<decltype(layout), Kokkos::LayoutStride> ||
|
||||
is_layouttiled<decltype(layout)>::value) {
|
||||
for (int i = N; i < 7; ++i)
|
||||
layout.dimension[i] = KOKKOS_IMPL_CTOR_DEFAULT_ARG;
|
||||
}
|
||||
|
||||
return View<typename RankDataType<T, N>::type, Args...>(v.data(), layout);
|
||||
}
|
||||
|
||||
template <typename Function, typename... Args>
|
||||
|
||||
@ -124,15 +124,8 @@ KOKKOS_INLINE_FUNCTION void offsetview_verify_operator_bounds(
|
||||
args...);
|
||||
Kokkos::Impl::throw_runtime_exception(std::string(buffer));))
|
||||
|
||||
KOKKOS_IF_ON_DEVICE((
|
||||
/* Check #1: is there a SharedAllocationRecord?
|
||||
(we won't use it, but if it is not there then there isn't
|
||||
a corresponding SharedAllocationHeader containing a label).
|
||||
This check should cover the case of Views that don't
|
||||
have the Unmanaged trait but were initialized by pointer. */
|
||||
if (tracker.has_record()) {
|
||||
Kokkos::Impl::operator_bounds_error_on_device(map);
|
||||
} else { Kokkos::abort("OffsetView bounds error"); }))
|
||||
KOKKOS_IF_ON_DEVICE(
|
||||
(Kokkos::abort("OffsetView bounds error"); (void)tracker;))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user