From b79c0bc7b41b88a162b4901e18d0fe3024057281 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 5 May 2022 11:44:47 -0600 Subject: [PATCH] Update Kokkos library in LAMMPS to v3.6.0 --- lib/kokkos/BUILD.md | 6 +- lib/kokkos/CHANGELOG.md | 115 + lib/kokkos/CMakeLists.txt | 38 +- lib/kokkos/Makefile.kokkos | 93 +- lib/kokkos/Makefile.targets | 4 +- lib/kokkos/README.md | 10 +- lib/kokkos/Spack.md | 30 +- lib/kokkos/algorithms/src/CMakeLists.txt | 1 + lib/kokkos/algorithms/src/Kokkos_Random.hpp | 421 +-- lib/kokkos/algorithms/src/Kokkos_Sort.hpp | 51 +- .../algorithms/src/Kokkos_StdAlgorithms.hpp | 102 + .../src/std_algorithms/Kokkos_BeginEnd.hpp | 105 + .../src/std_algorithms/Kokkos_Constraints.hpp | 237 ++ .../src/std_algorithms/Kokkos_Distance.hpp | 69 + .../Kokkos_HelperPredicates.hpp | 120 + .../Kokkos_MinMaxElementOperations.hpp | 409 +++ .../Kokkos_ModifyingOperations.hpp | 113 + .../Kokkos_ModifyingSequenceOperations.hpp | 51 + .../Kokkos_NonModifyingSequenceOperations.hpp | 2406 +++++++++++++++++ .../src/std_algorithms/Kokkos_Numeric.hpp | 59 + .../Kokkos_PartitioningOperations.hpp | 491 ++++ .../Kokkos_RandomAccessIterator.hpp | 194 ++ ...cerWithArbitraryJoinerNoNeutralElement.hpp | 118 + .../Kokkos_SortingOperations.hpp | 378 +++ ...Kokkos_ValueWrapperForNoNeutralElement.hpp | 78 + ...Kokkos_ModifyingSequenceOperationsSet1.hpp | 1285 +++++++++ ...Kokkos_ModifyingSequenceOperationsSet2.hpp | 1783 ++++++++++++ .../numeric/Kokkos_AdjacentDifference.hpp | 279 ++ .../numeric/Kokkos_ExclusiveScan.hpp | 517 ++++ .../Kokkos_IdentityReferenceUnaryFunctor.hpp | 64 + .../numeric/Kokkos_InclusiveScan.hpp | 699 +++++ .../std_algorithms/numeric/Kokkos_Reduce.hpp | 326 +++ .../numeric/Kokkos_TransformReduce.hpp | 488 ++++ .../algorithms/unit_tests/CMakeLists.txt | 191 +- lib/kokkos/algorithms/unit_tests/Makefile | 2 +- .../algorithms/unit_tests/TestRandom.hpp | 170 +- .../unit_tests/TestRandomAccessIterator.cpp | 252 ++ lib/kokkos/algorithms/unit_tests/TestSort.hpp | 12 +- .../TestStdAlgorithmsAdjacentDifference.cpp | 293 ++ .../TestStdAlgorithmsAdjacentFind.cpp | 325 +++ .../TestStdAlgorithmsAllAnyNoneOf.cpp | 183 ++ .../unit_tests/TestStdAlgorithmsCommon.cpp | 57 + .../unit_tests/TestStdAlgorithmsCommon.hpp | 255 ++ .../TestStdAlgorithmsCompileOnly.cpp | 553 ++++ .../TestStdAlgorithmsConstraints.cpp | 113 + .../unit_tests/TestStdAlgorithmsCopyIf.cpp | 308 +++ .../unit_tests/TestStdAlgorithmsCount.cpp | 142 + .../unit_tests/TestStdAlgorithmsEqual.cpp | 150 + .../TestStdAlgorithmsExclusiveScan.cpp | 381 +++ .../unit_tests/TestStdAlgorithmsFind.cpp | 191 ++ .../unit_tests/TestStdAlgorithmsFindEnd.cpp | 387 +++ .../TestStdAlgorithmsFindFirstOf.cpp | 303 +++ .../unit_tests/TestStdAlgorithmsForEach.cpp | 167 ++ .../TestStdAlgorithmsHelperFunctors.hpp | 188 ++ .../TestStdAlgorithmsInclusiveScan.cpp | 390 +++ .../unit_tests/TestStdAlgorithmsIsSorted.cpp | 222 ++ .../TestStdAlgorithmsIsSortedUntil.cpp | 225 ++ ...estStdAlgorithmsLexicographicalCompare.cpp | 184 ++ .../TestStdAlgorithmsMinMaxElementOps.cpp | 492 ++++ .../unit_tests/TestStdAlgorithmsMismatch.cpp | 228 ++ .../unit_tests/TestStdAlgorithmsModOps.cpp | 209 ++ .../unit_tests/TestStdAlgorithmsModSeqOps.cpp | 429 +++ .../unit_tests/TestStdAlgorithmsNumerics.cpp | 716 +++++ .../TestStdAlgorithmsPartitionCopy.cpp | 303 +++ .../TestStdAlgorithmsPartitioningOps.cpp | 258 ++ .../unit_tests/TestStdAlgorithmsRemove.cpp | 234 ++ .../TestStdAlgorithmsRemoveCopy.cpp | 265 ++ .../TestStdAlgorithmsRemoveCopyIf.cpp | 247 ++ .../unit_tests/TestStdAlgorithmsRemoveIf.cpp | 231 ++ .../unit_tests/TestStdAlgorithmsReplace.cpp | 255 ++ .../TestStdAlgorithmsReplaceCopy.cpp | 299 ++ .../TestStdAlgorithmsReplaceCopyIf.cpp | 300 ++ .../unit_tests/TestStdAlgorithmsReplaceIf.cpp | 257 ++ .../unit_tests/TestStdAlgorithmsReverse.cpp | 180 ++ .../unit_tests/TestStdAlgorithmsRotate.cpp | 275 ++ .../TestStdAlgorithmsRotateCopy.cpp | 275 ++ .../TestStdAlgorithmsScalarRedVsView.cpp | 235 ++ .../unit_tests/TestStdAlgorithmsSearch.cpp | 364 +++ .../unit_tests/TestStdAlgorithmsSearch_n.cpp | 336 +++ .../unit_tests/TestStdAlgorithmsShiftLeft.cpp | 243 ++ .../TestStdAlgorithmsShiftRight.cpp | 247 ++ ...estStdAlgorithmsTransformExclusiveScan.cpp | 320 +++ ...estStdAlgorithmsTransformInclusiveScan.cpp | 347 +++ .../TestStdAlgorithmsTransformUnaryOp.cpp | 174 ++ .../unit_tests/TestStdAlgorithmsUnique.cpp | 312 +++ .../TestStdAlgorithmsUniqueCopy.cpp | 361 +++ .../algorithms/unit_tests/TestStdReducers.cpp | 281 ++ lib/kokkos/benchmarks/atomic/main.cpp | 6 +- .../bytes_and_flops/bench_double.cpp | 48 + .../bytes_and_flops/bench_float.cpp | 48 + .../bytes_and_flops/bench_int32_t.cpp | 48 + .../bytes_and_flops/bench_int64_t.cpp | 48 + .../benchmarks/bytes_and_flops/main.cpp | 21 +- lib/kokkos/bin/hpcbind | 6 +- lib/kokkos/bin/kokkos_launch_compiler | 8 +- lib/kokkos/bin/nvcc_wrapper | 17 +- lib/kokkos/cmake/KokkosConfig.cmake.in | 2 + lib/kokkos/cmake/KokkosConfigCommon.cmake.in | 4 + lib/kokkos/cmake/KokkosCore_config.h.in | 8 +- lib/kokkos/cmake/Modules/FindTPLCUDA.cmake | 2 +- lib/kokkos/cmake/Modules/FindTPLHPX.cmake | 2 +- lib/kokkos/cmake/Modules/FindTPLPTHREAD.cmake | 20 - lib/kokkos/cmake/Modules/FindTPLTHREADS.cmake | 15 + .../compile_tests/cuda_compute_capability.cc | 1 + lib/kokkos/cmake/kokkos_arch.cmake | 92 +- lib/kokkos/cmake/kokkos_compiler_id.cmake | 77 +- lib/kokkos/cmake/kokkos_enable_devices.cmake | 12 +- lib/kokkos/cmake/kokkos_enable_options.cmake | 2 + lib/kokkos/cmake/kokkos_tpls.cmake | 15 +- lib/kokkos/cmake/kokkos_tribits.cmake | 29 +- .../performance_tests/CMakeLists.txt | 9 +- .../containers/performance_tests/Makefile | 2 +- lib/kokkos/containers/src/CMakeLists.txt | 6 +- lib/kokkos/containers/src/Kokkos_Bitset.hpp | 72 +- lib/kokkos/containers/src/Kokkos_DualView.hpp | 118 +- .../containers/src/Kokkos_DynRankView.hpp | 423 +-- .../containers/src/Kokkos_DynamicView.hpp | 21 +- .../containers/src/Kokkos_ErrorReporter.hpp | 18 +- .../containers/src/Kokkos_OffsetView.hpp | 158 +- .../containers/src/Kokkos_ScatterView.hpp | 132 +- .../containers/src/Kokkos_UnorderedMap.hpp | 104 +- .../src/impl/Kokkos_Functional_impl.hpp | 4 +- .../impl/Kokkos_StaticCrsGraph_factory.hpp | 2 + .../src/impl/Kokkos_UnorderedMap_impl.hpp | 16 +- .../containers/unit_tests/CMakeLists.txt | 15 +- lib/kokkos/containers/unit_tests/Makefile | 2 +- .../containers/unit_tests/TestDualView.hpp | 31 +- .../containers/unit_tests/TestDynViewAPI.hpp | 300 +- .../containers/unit_tests/TestDynamicView.hpp | 6 +- .../unit_tests/TestErrorReporter.hpp | 13 +- .../containers/unit_tests/TestOffsetView.hpp | 6 +- .../containers/unit_tests/TestScatterView.hpp | 304 ++- .../unit_tests/TestStaticCrsGraph.hpp | 4 +- .../unit_tests/TestUnorderedMap.hpp | 4 +- .../containers/unit_tests/TestVector.hpp | 8 +- .../unit_tests/TestWithoutInitializing.hpp | 183 ++ lib/kokkos/core/CMakeLists.txt | 2 - .../PerfTest_ExecSpacePartitioning.cpp | 102 +- lib/kokkos/core/src/CMakeLists.txt | 13 +- lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp | 37 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half.hpp | 1012 ------- .../src/Cuda/Kokkos_Cuda_Half_Conversion.hpp | 573 ++++ .../src/Cuda/Kokkos_Cuda_Half_Impl_Type.hpp | 80 + .../core/src/Cuda/Kokkos_Cuda_Instance.cpp | 115 +- .../core/src/Cuda/Kokkos_Cuda_Instance.hpp | 38 +- .../src/Cuda/Kokkos_Cuda_KernelLaunch.hpp | 5 +- .../core/src/Cuda/Kokkos_Cuda_Locks.cpp | 20 +- .../core/src/Cuda/Kokkos_Cuda_Locks.hpp | 1 - .../core/src/Cuda/Kokkos_Cuda_Parallel.hpp | 448 +-- .../core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp | 37 +- .../core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp | 118 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp | 55 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Abort.hpp | 4 +- .../HIP/Kokkos_HIP_BlockSize_Deduction.hpp | 5 + .../src/HIP/Kokkos_HIP_Half_Conversion.hpp | 248 ++ .../src/HIP/Kokkos_HIP_Half_Impl_Type.hpp | 63 + .../core/src/HIP/Kokkos_HIP_Instance.cpp | 89 +- .../core/src/HIP/Kokkos_HIP_Instance.hpp | 37 +- .../core/src/HIP/Kokkos_HIP_KernelLaunch.hpp | 15 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Locks.cpp | 19 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Locks.hpp | 1 - .../src/HIP/Kokkos_HIP_Parallel_MDRange.hpp | 100 +- .../src/HIP/Kokkos_HIP_Parallel_Range.hpp | 13 +- .../core/src/HIP/Kokkos_HIP_Parallel_Team.hpp | 162 +- .../core/src/HIP/Kokkos_HIP_ReduceScan.hpp | 4 - .../src/HIP/Kokkos_HIP_Shuffle_Reduce.hpp | 6 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp | 27 +- .../core/src/HIP/Kokkos_HIP_UniqueToken.hpp | 117 +- .../core/src/HIP/Kokkos_HIP_Vectorization.hpp | 31 +- lib/kokkos/core/src/HPX/Kokkos_HPX.cpp | 67 +- .../Kokkos_HPX_ChunkedRoundRobinExecutor.hpp | 208 -- lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp | 177 +- .../src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp | 44 +- lib/kokkos/core/src/Kokkos_Array.hpp | 29 +- lib/kokkos/core/src/Kokkos_Atomic.hpp | 6 +- .../core/src/Kokkos_Atomics_Desul_Config.hpp | 58 + .../Kokkos_Atomics_Desul_Volatile_Wrapper.hpp | 109 +- .../core/src/Kokkos_Atomics_Desul_Wrapper.hpp | 122 +- lib/kokkos/core/src/Kokkos_Concepts.hpp | 34 +- lib/kokkos/core/src/Kokkos_CopyViews.hpp | 468 ++-- lib/kokkos/core/src/Kokkos_Core.hpp | 22 +- lib/kokkos/core/src/Kokkos_Core_fwd.hpp | 95 +- lib/kokkos/core/src/Kokkos_Crs.hpp | 7 +- lib/kokkos/core/src/Kokkos_Cuda.hpp | 2 +- lib/kokkos/core/src/Kokkos_GraphNode.hpp | 7 +- lib/kokkos/core/src/Kokkos_HBWSpace.hpp | 70 +- lib/kokkos/core/src/Kokkos_HIP.hpp | 2 + lib/kokkos/core/src/Kokkos_HIP_Space.hpp | 2 +- lib/kokkos/core/src/Kokkos_HPX.hpp | 982 +++---- lib/kokkos/core/src/Kokkos_Half.hpp | 921 ++++++- lib/kokkos/core/src/Kokkos_HostSpace.hpp | 48 +- lib/kokkos/core/src/Kokkos_LogicalSpaces.hpp | 18 +- lib/kokkos/core/src/Kokkos_Macros.hpp | 92 +- .../core/src/Kokkos_MathematicalConstants.hpp | 85 + .../core/src/Kokkos_MathematicalFunctions.hpp | 147 +- .../Kokkos_MathematicalSpecialFunctions.hpp | 67 +- lib/kokkos/core/src/Kokkos_MemoryPool.hpp | 24 +- lib/kokkos/core/src/Kokkos_MinMaxClamp.hpp | 229 ++ lib/kokkos/core/src/Kokkos_NumericTraits.hpp | 99 +- lib/kokkos/core/src/Kokkos_OpenMP.hpp | 7 +- .../core/src/Kokkos_OpenMPTargetSpace.hpp | 13 +- lib/kokkos/core/src/Kokkos_Pair.hpp | 4 +- lib/kokkos/core/src/Kokkos_Parallel.hpp | 27 + .../core/src/Kokkos_Parallel_Reduce.hpp | 855 +++++- .../src/Kokkos_Profiling_ProfileSection.hpp | 19 +- lib/kokkos/core/src/Kokkos_Serial.hpp | 2 +- lib/kokkos/core/src/Kokkos_TaskScheduler.hpp | 10 - lib/kokkos/core/src/Kokkos_Threads.hpp | 28 +- lib/kokkos/core/src/Kokkos_Timer.hpp | 43 +- lib/kokkos/core/src/Kokkos_UniqueToken.hpp | 4 +- lib/kokkos/core/src/Kokkos_View.hpp | 231 +- .../core/src/OpenMP/Kokkos_OpenMP_Exec.cpp | 11 +- .../core/src/OpenMP/Kokkos_OpenMP_Exec.hpp | 100 +- .../src/OpenMP/Kokkos_OpenMP_Parallel.hpp | 2 +- .../OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp | 7 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp | 325 ++- .../Kokkos_OpenMPTarget_Parallel.hpp | 77 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp | 4 - .../OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp | 24 +- lib/kokkos/core/src/SYCL/Kokkos_SYCL.cpp | 11 +- .../core/src/SYCL/Kokkos_SYCL_Abort.hpp | 9 +- .../core/src/SYCL/Kokkos_SYCL_DeepCopy.hpp | 4 +- .../src/SYCL/Kokkos_SYCL_Half_Conversion.hpp | 159 ++ .../src/SYCL/Kokkos_SYCL_Half_Impl_Type.hpp | 67 + .../core/src/SYCL/Kokkos_SYCL_Instance.cpp | 63 +- .../core/src/SYCL/Kokkos_SYCL_Instance.hpp | 139 +- .../src/SYCL/Kokkos_SYCL_Parallel_Range.hpp | 120 +- .../src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp | 531 +++- .../src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp | 192 +- .../src/SYCL/Kokkos_SYCL_Parallel_Team.hpp | 458 ++-- .../core/src/SYCL/Kokkos_SYCL_Space.cpp | 47 +- lib/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp | 228 +- .../core/src/SYCL/Kokkos_SYCL_UniqueToken.hpp | 111 +- .../core/src/Threads/Kokkos_ThreadsExec.cpp | 72 +- .../core/src/Threads/Kokkos_ThreadsExec.hpp | 63 +- .../src/Threads/Kokkos_ThreadsExec_base.cpp | 145 +- .../core/src/Threads/Kokkos_ThreadsTeam.hpp | 279 +- .../src/Threads/Kokkos_Threads_Parallel.hpp | 20 + .../core/src/decl/Kokkos_Declare_CUDA.hpp | 2 + .../core/src/decl/Kokkos_Declare_SYCL.hpp | 2 + lib/kokkos/core/src/desul/.clang-format | 1 + lib/kokkos/core/src/desul/atomics/CUDA.hpp | 162 +- lib/kokkos/core/src/desul/atomics/Common.hpp | 2 + .../src/desul/atomics/Compare_Exchange.hpp | 2 + .../desul/atomics/Compare_Exchange_CUDA.hpp | 5 +- .../desul/atomics/Compare_Exchange_MSVC.hpp | 298 +- .../desul/atomics/Compare_Exchange_OpenMP.hpp | 50 +- .../desul/atomics/Compare_Exchange_SYCL.hpp | 203 +- .../atomics/Compare_Exchange_ScopeCaller.hpp | 43 + lib/kokkos/core/src/desul/atomics/Generic.hpp | 258 +- lib/kokkos/core/src/desul/atomics/HIP.hpp | 505 ++-- lib/kokkos/core/src/desul/atomics/Macros.hpp | 28 +- lib/kokkos/core/src/desul/atomics/SYCL.hpp | 213 +- .../src/desul/atomics/SYCLConversions.hpp | 95 +- ...da_cc7_asm_atomic_fetch_op.inc_forceglobal | 18 +- .../cuda_cc7_asm_atomic_fetch_op.inc_generic | 19 +- .../cuda_cc7_asm_atomic_fetch_op.inc_isglobal | 26 +- ...cuda_cc7_asm_atomic_fetch_op.inc_predicate | 34 +- .../core/src/fwd/Kokkos_Fwd_THREADS.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_BitOps.hpp | 16 +- .../src/impl/Kokkos_Command_Line_Parsing.cpp | 133 + .../src/impl/Kokkos_Command_Line_Parsing.hpp | 63 + lib/kokkos/core/src/impl/Kokkos_Core.cpp | 221 +- lib/kokkos/core/src/impl/Kokkos_Error.cpp | 35 +- lib/kokkos/core/src/impl/Kokkos_Error.hpp | 1 + .../core/src/impl/Kokkos_FunctorAdapter.hpp | 15 - lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp | 8 +- .../core/src/impl/Kokkos_HostBarrier.cpp | 2 +- .../core/src/impl/Kokkos_HostBarrier.hpp | 13 +- .../core/src/impl/Kokkos_HostSharedPtr.hpp | 55 +- lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp | 6 +- .../src/impl/Kokkos_HostSpace_deepcopy.cpp | 41 +- .../src/impl/Kokkos_HostSpace_deepcopy.hpp | 10 + .../core/src/impl/Kokkos_HostThreadTeam.hpp | 318 +-- .../core/src/impl/Kokkos_Memory_Fence.hpp | 4 +- .../core/src/impl/Kokkos_NumericTraits.cpp | 60 +- lib/kokkos/core/src/impl/Kokkos_Profiling.cpp | 349 ++- lib/kokkos/core/src/impl/Kokkos_Profiling.hpp | 468 +--- .../src/impl/Kokkos_Profiling_C_Interface.h | 2 +- .../src/impl/Kokkos_Profiling_Interface.hpp | 24 +- .../core/src/impl/Kokkos_SharedAlloc.cpp | 6 +- .../core/src/impl/Kokkos_SharedAlloc.hpp | 152 +- .../src/impl/Kokkos_SharedAlloc_timpl.hpp | 26 +- lib/kokkos/core/src/impl/Kokkos_Spinwait.cpp | 7 +- lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp | 2 +- .../core/src/impl/Kokkos_Tools_Generic.hpp | 493 ++++ lib/kokkos/core/src/impl/Kokkos_Traits.hpp | 25 - lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp | 3 +- lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp | 12 + .../core/src/impl/Kokkos_ViewMapping.hpp | 114 +- .../core/src/impl/Kokkos_ViewTracker.hpp | 38 +- .../core/src/setup/Kokkos_Setup_Cuda.hpp | 3 - .../core/src/setup/Kokkos_Setup_SYCL.hpp | 17 +- .../src/traits/Kokkos_ExecutionSpaceTrait.hpp | 1 + .../src/traits/Kokkos_GraphKernelTrait.hpp | 1 + .../core/src/traits/Kokkos_IndexTypeTrait.hpp | 1 + .../traits/Kokkos_IterationPatternTrait.hpp | 1 + .../src/traits/Kokkos_LaunchBoundsTrait.hpp | 1 + .../traits/Kokkos_OccupancyControlTrait.hpp | 1 + .../core/src/traits/Kokkos_ScheduleTrait.hpp | 1 + .../core/src/traits/Kokkos_Traits_fwd.hpp | 9 + .../traits/Kokkos_WorkItemPropertyTrait.hpp | 1 + .../core/src/traits/Kokkos_WorkTagTrait.hpp | 1 + lib/kokkos/core/unit_test/CMakeLists.txt | 92 +- lib/kokkos/core/unit_test/Makefile | 18 +- lib/kokkos/core/unit_test/TestAggregate.hpp | 10 +- .../core/unit_test/TestAtomicOperations.hpp | 167 ++ .../unit_test/TestAtomicOperations_shared.hpp | 86 + .../TestAtomicOperations_unsignedint.hpp | 8 + .../TestAtomicOperations_unsignedlongint.hpp | 8 + lib/kokkos/core/unit_test/TestAtomicViews.hpp | 36 +- lib/kokkos/core/unit_test/TestCTestDevice.cpp | 37 +- lib/kokkos/core/unit_test/TestComplex.hpp | 15 +- .../core/unit_test/TestDeepCopyAlignment.hpp | 3 +- .../unit_test/TestDefaultDeviceTypeInit.hpp | 22 +- .../core/unit_test/TestHalfConversion.hpp | 40 +- .../core/unit_test/TestHalfOperators.hpp | 446 +-- lib/kokkos/core/unit_test/TestMDRange.hpp | 7 + lib/kokkos/core/unit_test/TestMDRange_g.hpp | 111 + .../unit_test/TestMathematicalConstants.hpp | 151 ++ .../unit_test/TestMathematicalFunctions.hpp | 100 +- .../TestMathematicalSpecialFunctions.hpp | 4 + lib/kokkos/core/unit_test/TestMinMaxClamp.hpp | 333 +++ .../core/unit_test/TestNumericTraits.hpp | 180 ++ lib/kokkos/core/unit_test/TestOther.hpp | 3 + .../core/unit_test/TestPolicyConstruction.hpp | 87 +- lib/kokkos/core/unit_test/TestRealloc.hpp | 164 ++ lib/kokkos/core/unit_test/TestReduce.hpp | 39 +- lib/kokkos/core/unit_test/TestReducers.hpp | 39 +- lib/kokkos/core/unit_test/TestReducers_d.hpp | 46 + lib/kokkos/core/unit_test/TestReducers_e.hpp | 52 + .../unit_test/TestReductions_DeviceView.hpp | 5 + lib/kokkos/core/unit_test/TestSharedAlloc.hpp | 27 +- lib/kokkos/core/unit_test/TestTeam.hpp | 14 +- lib/kokkos/core/unit_test/TestTeamBasic.hpp | 31 + lib/kokkos/core/unit_test/TestTeamScan.hpp | 62 +- .../core/unit_test/TestTeamTeamSize.hpp | 45 +- lib/kokkos/core/unit_test/TestUtilities.hpp | 4 +- lib/kokkos/core/unit_test/TestViewAPI.hpp | 27 +- .../TestViewLayoutStrideAssignment.hpp | 2 + .../core/unit_test/TestViewMapping_a.hpp | 331 +-- .../core/unit_test/TestViewMapping_b.hpp | 20 +- .../unit_test/TestViewMapping_subview.hpp | 26 +- lib/kokkos/core/unit_test/TestViewResize.hpp | 6 + lib/kokkos/core/unit_test/TestViewSubview.hpp | 48 +- .../unit_test/TestWithoutInitializing.hpp | 98 + .../cuda/TestCuda_ReducerViewSizeLimit.cpp | 195 ++ .../default/TestDefaultDeviceTypeViewAPI.cpp | 152 ++ .../headers_self_contained/CMakeLists.txt | 1 + .../hpx/TestHPX_IndependentInstances.cpp | 4 +- ...X_IndependentInstancesDelayedExecution.cpp | 2 +- ...estHPX_IndependentInstancesInstanceIds.cpp | 40 +- .../incremental/Test01_execspace.hpp | 2 +- .../Test04_ParallelFor_RangePolicy.hpp | 1 + .../Test06_ParallelFor_MDRangePolicy.hpp | 3 + .../openmp/TestOpenMP_PartitionMaster.cpp | 2 + .../sycl/TestSYCL_InterOp_Streams.cpp | 5 +- .../unit_test/tools/TestBuiltinTuners.cpp | 5 +- .../core/unit_test/tools/TestCInterface.c | 2 +- .../unit_test/tools/TestEventCorrectness.cpp | 1 - .../unit_test/tools/TestEventCorrectness.hpp | 408 ++- .../core/unit_test/tools/TestIndependence.cpp | 58 + .../unit_test/tools/TestLogicalSpaces.hpp | 1 + .../unit_test/tools/TestProfilingSection.cpp | 143 + .../tools/TestWithoutInitializing.cpp | 77 + .../tools/include/ToolTestingUtilities.hpp | 1305 +++++++++ lib/kokkos/example/CMakeLists.txt | 1 - .../build_cmake_in_tree/CMakeLists.txt | 4 +- .../build_cmake_in_tree/cmake_example.cpp | 2 +- .../build_cmake_installed/CMakeLists.txt | 4 +- .../bar.cpp | 43 + .../CMakeLists.txt | 18 + .../bar.cpp | 46 + .../cmake_example.cpp | 90 + .../foo.f | 4 + lib/kokkos/example/tutorial/CMakeLists.txt | 2 - lib/kokkos/generate_makefile.bash | 8 +- lib/kokkos/gnu_generate_makefile.bash | 6 +- lib/kokkos/master_history.txt | 1 + 380 files changed, 41928 insertions(+), 8786 deletions(-) create mode 100644 lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_Constraints.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_HelperPredicates.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElementOperations.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingOperations.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingSequenceOperations.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_Numeric.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitioningOperations.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_RandomAccessIterator.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReducerWithArbitraryJoinerNoNeutralElement.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_SortingOperations.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/Kokkos_ValueWrapperForNoNeutralElement.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet1.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/modifying_sequence_ops/Kokkos_ModifyingSequenceOperationsSet2.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_AdjacentDifference.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_ExclusiveScan.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_IdentityReferenceUnaryFunctor.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_InclusiveScan.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_Reduce.hpp create mode 100644 lib/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_TransformReduce.hpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCount.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsEqual.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFind.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsHelperFunctors.hpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSorted.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsScalarRedVsView.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestStdReducers.cpp create mode 100644 lib/kokkos/benchmarks/bytes_and_flops/bench_double.cpp create mode 100644 lib/kokkos/benchmarks/bytes_and_flops/bench_float.cpp create mode 100644 lib/kokkos/benchmarks/bytes_and_flops/bench_int32_t.cpp create mode 100644 lib/kokkos/benchmarks/bytes_and_flops/bench_int64_t.cpp delete mode 100644 lib/kokkos/cmake/Modules/FindTPLPTHREAD.cmake create mode 100644 lib/kokkos/cmake/Modules/FindTPLTHREADS.cmake create mode 100644 lib/kokkos/containers/unit_tests/TestWithoutInitializing.hpp delete mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Conversion.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Half_Impl_Type.hpp create mode 100644 lib/kokkos/core/src/HIP/Kokkos_HIP_Half_Conversion.hpp create mode 100644 lib/kokkos/core/src/HIP/Kokkos_HIP_Half_Impl_Type.hpp delete mode 100644 lib/kokkos/core/src/HPX/Kokkos_HPX_ChunkedRoundRobinExecutor.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp create mode 100644 lib/kokkos/core/src/Kokkos_MathematicalConstants.hpp create mode 100644 lib/kokkos/core/src/Kokkos_MinMaxClamp.hpp create mode 100644 lib/kokkos/core/src/SYCL/Kokkos_SYCL_Half_Conversion.hpp create mode 100644 lib/kokkos/core/src/SYCL/Kokkos_SYCL_Half_Impl_Type.hpp create mode 100644 lib/kokkos/core/src/desul/atomics/Compare_Exchange_ScopeCaller.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Command_Line_Parsing.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Tools_Generic.hpp create mode 100644 lib/kokkos/core/unit_test/TestAtomicOperations_shared.hpp create mode 100644 lib/kokkos/core/unit_test/TestMDRange_g.hpp create mode 100644 lib/kokkos/core/unit_test/TestMathematicalConstants.hpp create mode 100644 lib/kokkos/core/unit_test/TestMinMaxClamp.hpp create mode 100644 lib/kokkos/core/unit_test/TestRealloc.hpp create mode 100644 lib/kokkos/core/unit_test/TestReducers_e.hpp create mode 100644 lib/kokkos/core/unit_test/TestWithoutInitializing.hpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_ReducerViewSizeLimit.cpp create mode 100644 lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeViewAPI.cpp create mode 100644 lib/kokkos/core/unit_test/tools/TestIndependence.cpp create mode 100644 lib/kokkos/core/unit_test/tools/TestProfilingSection.cpp create mode 100644 lib/kokkos/core/unit_test/tools/TestWithoutInitializing.cpp create mode 100644 lib/kokkos/core/unit_test/tools/include/ToolTestingUtilities.hpp create mode 100644 lib/kokkos/example/build_cmake_installed_kk_as_language/CMakeLists.txt create mode 100644 lib/kokkos/example/build_cmake_installed_kk_as_language/bar.cpp create mode 100644 lib/kokkos/example/build_cmake_installed_kk_as_language/cmake_example.cpp create mode 100644 lib/kokkos/example/build_cmake_installed_kk_as_language/foo.f diff --git a/lib/kokkos/BUILD.md b/lib/kokkos/BUILD.md index bb1a31f266..114baf99f1 100644 --- a/lib/kokkos/BUILD.md +++ b/lib/kokkos/BUILD.md @@ -119,8 +119,8 @@ Device backends can be enabled by specifying `-DKokkos_ENABLE_X`. * Kokkos_ENABLE_OPENMP * Whether to build OpenMP backend * BOOL Default: OFF -* Kokkos_ENABLE_PTHREAD - * Whether to build Pthread backend +* Kokkos_ENABLE_THREADS + * Whether to build C++ thread backend * BOOL Default: OFF * Kokkos_ENABLE_SERIAL * Whether to build serial backend @@ -178,7 +178,7 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`. * Whether to print information about which profiling tools gotloaded * BOOL Default: OFF * Kokkos_ENABLE_TESTS - * Whether to build serial backend + * Whether to enable test suite * BOOL Default: OFF ## Other Options diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index 2e779791dd..dfbe22edde 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,120 @@ # Change Log +## [3.6.00](https://github.com/kokkos/kokkos/tree/3.6.00) (2022-02-18) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.5.00...3.6.00) + +### Features: +- Add C++ standard algorithms [\#4315](https://github.com/kokkos/kokkos/pull/4315) +- Implement `fill_random` for `DynRankView` [\#4763](https://github.com/kokkos/kokkos/pull/4763) +- Add `bhalf_t` [\#4543](https://github.com/kokkos/kokkos/pull/4543) [\#4653](https://github.com/kokkos/kokkos/pull/4653) +- Add mathematical constants [\#4519](https://github.com/kokkos/kokkos/pull/4519) +- Allow `Kokkos::{create_mirror*,resize,realloc}` to be used with `WithoutInitializing` [\#4486](https://github.com/kokkos/kokkos/pull/4486) [\#4337](https://github.com/kokkos/kokkos/pull/4337) +- Implement `KOKKOS_IF_ON_{HOST,DEVICE}` macros [\#4660](https://github.com/kokkos/kokkos/pull/4660) +- Allow setting the CMake language for Kokkos [\#4323](https://github.com/kokkos/kokkos/pull/4323) + +#### Perf bug fix +- Desul: Add ScopeCaller [\#4690](https://github.com/kokkos/kokkos/pull/4690) +- Enable Desul atomics by default when using Makefiles [\#4606](https://github.com/kokkos/kokkos/pull/4606) +- Unique token improvement [\#4741](https://github.com/kokkos/kokkos/pull/4741) [\#4748](https://github.com/kokkos/kokkos/pull/4748) + +#### Other improvements: +- Add math function long double overload on the host side [\#4712](https://github.com/kokkos/kokkos/pull/4712) + +### Deprecations: +- Array reductions with pointer return types [\#4756](https://github.com/kokkos/kokkos/pull/4756) +- Deprecate `partition_master`, `validate_partition` [\#4737](https://github.com/kokkos/kokkos/pull/4737) +- Deprecate `Kokkos_ENABLE_PTHREAD` in favor of `Kokkos_ENABLE_THREADS` [\#4619](https://github.com/kokkos/kokkos/pull/4619) ** pair with use std::threads ** +- Deprecate `log2(unsigned) -> int` (removing in next release) [\#4595](https://github.com/kokkos/kokkos/pull/4595) +- Deprecate `Kokkos::Impl::is_view` [\#4592](https://github.com/kokkos/kokkos/pull/4592) +- Deprecate `KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_*` macros and the `ActiveExecutionMemorySpace` alias [\#4668](https://github.com/kokkos/kokkos/issues/4668) + +### Backends and Archs Enhancements: + +#### SYCL: +- Update required SYCL compiler version [\#4749](https://github.com/kokkos/kokkos/pull/4749) +- Cap vector size to kernel maximum for SYCL [\#4704](https://github.com/kokkos/kokkos/pull/4704) +- Improve check for compatibility of vector size and subgroup size in SYCL [\#4579](https://github.com/kokkos/kokkos/pull/4579) +- Provide `chunk_size` for SYCL [\#4635](https://github.com/kokkos/kokkos/pull/4635) +- Use host-pinned memory for SYCL kernel memory [\#4627](https://github.com/kokkos/kokkos/pull/4627) +- Use shuffle-based algorithm for scalar reduction [\#4608](https://github.com/kokkos/kokkos/pull/4608) +- Implement pool of USM IndirectKernelMemory [\#4596](https://github.com/kokkos/kokkos/pull/4596) +- Provide valid default team size for SYCL [\#4481](https://github.com/kokkos/kokkos/pull/4481) + +#### CUDA: +- Add checks for shmem usage in `parallel_reduce` [\#4548](https://github.com/kokkos/kokkos/pull/4548) + +#### HIP: +- Add support for fp16 in the HIP backend [\#4688](https://github.com/kokkos/kokkos/pull/4688) +- Disable multiple kernel instantiations when using HIP (configure with `-DKokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS=ON` to use) [\#4644](https://github.com/kokkos/kokkos/pull/4644) +- Fix HIP scratch use per instance [\#4439](https://github.com/kokkos/kokkos/pull/4439) +- Change allocation header to 256B alignment for AMD VEGA architecture [\#4753](https://github.com/kokkos/kokkos/pull/4753) +- Add generic `KOKKOS_ARCH_VEGA` macro [\#4782](https://github.com/kokkos/kokkos/pull/4782) +- Require ROCm 4.5 [\#4689](https://github.com/kokkos/kokkos/pull/4689) + +### HPX: +- Adapt to HPX 1.7.0 which is now required [\#4241](https://github.com/kokkos/kokkos/pull/4241) + +#### OpenMP: +- Fix thread deduction for OpenMP for `thread_count==0` [\#4541](https://github.com/kokkos/kokkos/pull/4541) + +#### OpenMPTarget: +- Update memory space `size_type` to improve performance (`size_t -> unsigned`) [\#4779](https://github.com/kokkos/kokkos/pull/4779) + +#### Other Improvements: +- Improve NVHPC support [\#4599](https://github.com/kokkos/kokkos/pull/4599) +- Add `Kokkos::Experimental::{min,max,minmax,clamp}` [\#4629](https://github.com/kokkos/kokkos/pull/4629) [\#4506](https://github.com/kokkos/kokkos/pull/4506) +- Use device type as template argument in Containers and Algorithms [\#4724](https://github.com/kokkos/kokkos/pull/4724) [\#4675](https://github.com/kokkos/kokkos/pull/4675) +- Implement `Kokkos::sort` with execution space [\#4490](https://github.com/kokkos/kokkos/pull/4490) +- `Kokkos::resize` always error out for mismatch in runtime rank [\#4681](https://github.com/kokkos/kokkos/pull/4681) +- Print current call stack when calling `Kokkos::abort()` from the host [\#4672](https://github.com/kokkos/kokkos/pull/4672) [\#4671](https://github.com/kokkos/kokkos/pull/4671) +- Detect mismatch of execution spaces in functors [\#4655](https://github.com/kokkos/kokkos/pull/4655) +- Improve view label access on host [\#4647](https://github.com/kokkos/kokkos/pull/4647) +- Error out for `const` scalar return type in reduction [\#4645](https://github.com/kokkos/kokkos/pull/4645) +- Don't allow calling `UnorderdMap::value_at` for a set [\#4639](https://github.com/kokkos/kokkos/pull/4639) +- Add `KOKKOS_COMPILER_NVHPC` macro, disable `quiet_NaN` and `signaling_NaN` [\#4586](https://github.com/kokkos/kokkos/pull/4586) +- Improve performance of `local_deep_copy` [\#4511](https://github.com/kokkos/kokkos/pull/4511) +- Improve performance when sorting integers [\#4464](https://github.com/kokkos/kokkos/pull/4464) +- Add missing numeric traits (`denorm_min`, `reciprocal_overflow_threshold`, `{quiet,silent}_NaN}`) and make them work on cv-qualified types [\#4466](https://github.com/kokkos/kokkos/pull/4466) [\#4415](https://github.com/kokkos/kokkos/pull/4415) [\#4473](https://github.com/kokkos/kokkos/pull/4473) [\#4443](https://github.com/kokkos/kokkos/pull/4443) + +### Implemented enhancements BuildSystem +- Manually compute IntelLLVM compiler version for older CMake versions [\#4760](https://github.com/kokkos/kokkos/pull/4760) +- Add Xptxas without = to `nvcc_wrapper` [\#4646](https://github.com/kokkos/kokkos/pull/4646) +- Use external GoogleTest optionally [\#4563](https://github.com/kokkos/kokkos/pull/4563) +- Silent warnings about multiple optimization flags with `nvcc_wrapper` [\#4502](https://github.com/kokkos/kokkos/pull/4502) +- Use the same flags in Makefile.kokkos for POWER7/8/9 as for CMake [\#4483](https://github.com/kokkos/kokkos/pull/4483) +- Fix support for A64FX architecture [\#4745](https://github.com/kokkos/kokkos/pull/4745) + +### Incompatibilities: +- Drop `KOKKOS_ARCH_HIP` macro when using generated GNU makefiles [\#4786](https://github.com/kokkos/kokkos/pull/4786) +- Remove gcc-toolchain auto add for clang in Makefile.kokkos [\#4762](https://github.com/kokkos/kokkos/pull/4762) + +### Bug Fixes: +- Lock constant memory in Cuda/HIP kernel launch with a mutex (thread safety) [\#4525](https://github.com/kokkos/kokkos/pull/4525) +- Fix overflow for large requested scratch allocation [\#4551](https://github.com/kokkos/kokkos/pull/4551) +- Fix Windows build in mingw [\#4564](https://github.com/kokkos/kokkos/pull/4564) +- Fix `kokkos_launch_compiler`: escape `$` character [\#4769](https://github.com/kokkos/kokkos/pull/4769) [\#4703](https://github.com/kokkos/kokkos/pull/4703) +- Fix math functions with NVCC and GCC 5 as host compiler [\#4733](https://github.com/kokkos/kokkos/pull/4733) +- Fix shared build with Intel19 [\#4725](https://github.com/kokkos/kokkos/pull/4725) +- Do not install empty `desul/src/` directory [\#4714](https://github.com/kokkos/kokkos/pull/4714) +- Fix wrong `device_id` computation in `identifier_from_devid` (Profiling Interface) [\#4694](https://github.com/kokkos/kokkos/pull/4694) +- Fix a bug in CUDA scratch memory pool (abnormally high memory consumption) [\#4673](https://github.com/kokkos/kokkos/pull/4673) +- Remove eval of command args in `hpcbind` [\#4630](https://github.com/kokkos/kokkos/pull/4630) +- SYCL fix to run when no GPU is detected [\#4623](https://github.com/kokkos/kokkos/pull/4623) +- Fix `layout_strides::span` for rank-0 views [\#4605](https://github.com/kokkos/kokkos/pull/4605) +- Fix SYCL atomics for local memory [\#4585](https://github.com/kokkos/kokkos/pull/4585) +- Hotfix `mdrange_large_deep_copy` for SYCL [\#4581](https://github.com/kokkos/kokkos/pull/4581) +- Fix bug when sorting integer using the HIP backend [\#4570](https://github.com/kokkos/kokkos/pull/4570) +- Fix compilation error when using HIP with RDC [\#4553](https://github.com/kokkos/kokkos/pull/4553) +- `DynamicView`: Fix deallocation extent [\#4533](https://github.com/kokkos/kokkos/pull/4533) +- SYCL fix running parallel_reduce with TeamPolicy for large ranges [\#4532](https://github.com/kokkos/kokkos/pull/4532) +- Fix bash syntax error in `nvcc_wrapper` [\#4524](https://github.com/kokkos/kokkos/pull/4524) +- OpenMPTarget `team_policy` reduce fixes for `init/join` reductions [\#4521](https://github.com/kokkos/kokkos/pull/4521) +- Avoid hangs in the Threads backend [\#4499](https://github.com/kokkos/kokkos/pull/4499) +- OpenMPTarget fix reduction bug in `parallel_reduce` for `TeamPolicy` [\#4491](https://github.com/kokkos/kokkos/pull/4491) +- HIP fix scratch space per instance [\#4439](https://github.com/kokkos/kokkos/pull/4439) +- OpenMPTarget fix team scratch allocation [\#4431](https://github.com/kokkos/kokkos/pull/4431) + + ## [3.5.00](https://github.com/kokkos/kokkos/tree/3.5.00) (2021-10-19) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.4.01...3.5.00) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 1b6753f983..e1c6893725 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -1,7 +1,7 @@ # Disable in-source builds to prevent source tree corruption. if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" ) - message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files." ) + message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files and delete CMakeCache.txt." ) endif() # We want to determine if options are given with the wrong case @@ -75,7 +75,31 @@ IF(NOT KOKKOS_HAS_TRILINOS) cmake_minimum_required(VERSION 3.16 FATAL_ERROR) set(CMAKE_DISABLE_SOURCE_CHANGES ON) set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) + + # What language are we compiling Kokkos as + # downstream dependencies need to match this! + SET(KOKKOS_COMPILE_LANGUAGE CXX) + # use lower case here since we didn't parse options yet + IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) + + # Without this as a language for the package we would get a C++ compiler enabled. + # but we still need a C++ compiler even if we build all our cpp files as CUDA only + # because otherwise the C++ features don't work etc. + # This is just the rather odd way CMake does this, since CUDA doesn't imply C++ even + # though it is a C++ extension ... (but I guess it didn't use to be back in CUDA 4 or 5 + # days. + SET(KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE CXX) + + IF (Kokkos_ENABLE_CUDA) + SET(KOKKOS_COMPILE_LANGUAGE CUDA) + ENDIF() + ENDIF() + IF (Spack_WORKAROUND) + IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) + MESSAGE(FATAL_ERROR "Can't currently use Kokkos_ENABLE_COMPILER_AS_CMAKE_LANGUAGE in a spack installation!") + ENDIF() + #if we are explicitly using Spack for development, #nuke the Spack compiler SET(SPACK_CXX $ENV{SPACK_CXX}) @@ -86,7 +110,7 @@ IF(NOT KOKKOS_HAS_TRILINOS) ENDIF() # Always call the project command to define Kokkos_ variables # and to make sure that C++ is an enabled language - PROJECT(Kokkos CXX) + PROJECT(Kokkos ${KOKKOS_COMPILE_LANGUAGE} ${KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE}) IF(NOT HAS_PARENT) IF (NOT CMAKE_BUILD_TYPE) SET(DEFAULT_BUILD_TYPE "RelWithDebInfo") @@ -111,7 +135,7 @@ ENDIF() set(Kokkos_VERSION_MAJOR 3) -set(Kokkos_VERSION_MINOR 5) +set(Kokkos_VERSION_MINOR 6) set(Kokkos_VERSION_PATCH 00) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") @@ -151,6 +175,10 @@ IF(NOT MSVC) GLOBAL_APPEND(KOKKOS_LINK_OPTIONS -DKOKKOS_DEPENDENCE) ENDIF() +IF(Kokkos_ENABLE_TESTS AND NOT KOKKOS_HAS_TRILINOS) + find_package(GTest) +ENDIF() + # Include a set of Kokkos-specific wrapper functions that # will either call raw CMake or TriBITS # These are functions like KOKKOS_INCLUDE_DIRECTORIES @@ -174,10 +202,6 @@ KOKKOS_SETUP_BUILD_ENVIRONMENT() # 7) Export and install targets OPTION(BUILD_SHARED_LIBS "Build shared libraries" OFF) -# Workaround for building position independent code. -IF(BUILD_SHARED_LIBS) - SET(CMAKE_POSITION_INDEPENDENT_CODE ON) -ENDIF() SET(KOKKOS_EXT_LIBRARIES Kokkos::kokkos Kokkos::kokkoscore Kokkos::kokkoscontainers Kokkos::kokkosalgorithms) SET(KOKKOS_INT_LIBRARIES kokkos kokkoscore kokkoscontainers kokkosalgorithms) diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 899d6e49a2..aa5f7c98f8 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -11,13 +11,13 @@ CXXFLAGS += $(SHFLAGS) endif KOKKOS_VERSION_MAJOR = 3 -KOKKOS_VERSION_MINOR = 5 +KOKKOS_VERSION_MINOR = 6 KOKKOS_VERSION_PATCH = 00 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) -# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Pthread,Serial +# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial KOKKOS_DEVICES ?= "OpenMP" -#KOKKOS_DEVICES ?= "Pthread" +#KOKKOS_DEVICES ?= "Threads" # Options: # Intel: KNC,KNL,SNB,HSW,BDW,SKX # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86 @@ -33,8 +33,8 @@ KOKKOS_DEBUG ?= "no" KOKKOS_USE_TPLS ?= "" # Options: c++14,c++1y,c++17,c++1z,c++2a KOKKOS_CXX_STANDARD ?= "c++14" -# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align,disable_deprecated_code,enable_deprecation_warnings,enable_desul_atomics -KOKKOS_OPTIONS ?= "enable_desul_atomics" +# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align,disable_deprecated_code,enable_deprecation_warnings,disable_desul_atomics +KOKKOS_OPTIONS ?= "" KOKKOS_CMAKE ?= "no" KOKKOS_TRIBITS ?= "no" KOKKOS_STANDALONE_CMAKE ?= "no" @@ -93,7 +93,9 @@ KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda) KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr) KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) +# deprecated KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_desul_atomics) +KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_desul_atomics) KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code) KOKKOS_INTERNAL_ENABLE_DEPRECATION_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecation_warnings) @@ -101,12 +103,18 @@ KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS), # Check for Kokkos Host Execution Spaces one of which must be on. KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP) -KOKKOS_INTERNAL_USE_PTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread) +KOKKOS_INTERNAL_USE_THREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Threads) +# deprecated +KOKKOS_INTERNAL_USE_PTHREAD := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread) KOKKOS_INTERNAL_USE_HPX := $(call kokkos_has_string,$(KOKKOS_DEVICES),HPX) KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial) +ifeq ($(KOKKOS_INTERNAL_USE_PTHREAD), 1) + KOKKOS_INTERNAL_USE_THREADS := 1 + $(warning Warning: Pthread is deprecated. Use Threads instead! KOKKOS_DEVICES=$(KOKKOS_DEVICES)) +endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) - ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) + ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 0) ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0) KOKKOS_INTERNAL_USE_SERIAL := 1 endif @@ -126,7 +134,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) KOKKOS_DEVICELIST += OpenMP endif -ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) +ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) KOKKOS_DEVICELIST += Threads endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) @@ -472,7 +480,7 @@ ifneq ($(KOKKOS_CMAKE), yes) # CXXLDFLAGS is used together with CXXFLAGS in a combined compile/link command KOKKOS_CXXLDFLAGS = -L$(shell pwd) endif -KOKKOS_LINK_FLAGS = +KOKKOS_LINK_FLAGS = KOKKOS_SRC = KOKKOS_HEADERS = @@ -504,7 +512,7 @@ tmp := $(call kokkos_append_header,'$H''endif') tmp := $(call kokkos_append_header,"") tmp := $(call kokkos_append_header,"$H""define KOKKOS_VERSION $(KOKKOS_VERSION)") tmp := $(call kokkos_append_header,"") - + tmp := $(call kokkos_append_header,"/* Execution Spaces */") ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) @@ -531,7 +539,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMP') endif -ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) +ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_THREADS") endif @@ -952,18 +960,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) else - ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 - KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - - else - # Assume that this is a really a GNU compiler on P8. - KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 - KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 - endif - endif + KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 + KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 endif endif @@ -973,18 +971,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) else - ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9 - KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - - else - # Assume that this is a really a GNU compiler on P9 - KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9 - KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9 - endif - endif + KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9 + KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9 endif endif @@ -1202,32 +1190,32 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) # Lets start with adding architecture defines ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 900") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA900") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 906") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 908") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx908 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 90A") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx90a endif KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp) - ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp) + ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) KOKKOS_SRC += $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp endif - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp) KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) @@ -1285,8 +1273,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_SYCL_ARCH_FLAG) endif -ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1) +ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_DESUL_ATOMICS") +else + ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1) + $(error Contradictory Desul atomics options: KOKKOS_OPTIONS=$(KOKKOS_OPTIONS) ) + endif endif KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1) @@ -1327,7 +1319,6 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") - tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_SetupBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") @@ -1338,7 +1329,7 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif - ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif @@ -1367,7 +1358,7 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) - ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1) + ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) KOKKOS_SRC += $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) @@ -1419,7 +1410,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) KOKKOS_LINK_FLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) endif -ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) +ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) KOKKOS_LIBS += -lpthread @@ -1453,14 +1444,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) KOKKOS_TPL_LIBRARY_NAMES += hpx endif -# Explicitly set the GCC Toolchain for Clang. -ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_GCC_PATH = $(shell which g++) - KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=) - KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) - KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -endif - # Don't include Kokkos_HBWSpace.cpp if not using MEMKIND to avoid a link warning. ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC)) @@ -1507,7 +1490,7 @@ libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS) ar cr libkokkos.a $(KOKKOS_OBJ_LINK) ranlib libkokkos.a -print-cxx-flags: +print-cxx-flags: echo "$(KOKKOS_CXXFLAGS)" KOKKOS_LINK_DEPENDS=libkokkos.a diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index c097e80fec..a9cb12e1b4 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -10,6 +10,8 @@ Kokkos_Stacktrace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_S $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Stacktrace.cpp Kokkos_ExecPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp +Kokkos_Command_Line_Parsing.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Command_Line_Parsing.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Command_Line_Parsing.cpp Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp @@ -72,7 +74,7 @@ Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/desul/src/Lock_A $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp endif -ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) +ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp diff --git a/lib/kokkos/README.md b/lib/kokkos/README.md index 673f462712..f6c500cc1a 100644 --- a/lib/kokkos/README.md +++ b/lib/kokkos/README.md @@ -48,14 +48,14 @@ For specifics see the LICENSE file contained in the repository or distribution. Generally Kokkos should work with all compiler versions newer than the minimum. However as in all sufficiently complex enough code, we have to work around compiler bugs with almost all compilers. So compiler versions we don't test may have issues -we are unaware off. +we are unaware of. * GCC: 5.3.0 * Clang: 4.0.0 * Intel: 17.0.1 * NVCC: 9.2.88 * NVC++: 21.5 -* ROCM: 4.3 +* ROCm: 4.3 * MSVC: 19.29 * IBM XL: 16.1.1 * Fujitsu: 4.5.0 @@ -70,7 +70,7 @@ we are unaware off. * MSVC: 19.29 * ARM/Clang: 20.1 * IBM XL: 16.1.1 -* ROCM: 4.3.0 +* ROCm: 4.3.0 ### Build system: @@ -80,7 +80,7 @@ we are unaware off. Primary tested compiler are passing in release mode with warnings as errors. They also are tested with a comprehensive set of -backend combinations (i.e. OpenMP, Pthreads, Serial, OpenMP+Serial, ...). +backend combinations (i.e. OpenMP, Threads, Serial, OpenMP+Serial, ...). We are using the following set of flags: * GCC: ```` @@ -193,7 +193,7 @@ The main reason is that you may otherwise need many different configurations of Kokkos installed depending on the required compile time features an application needs. For example there is only one default execution space, which means you need different installations to have OpenMP -or Pthreads as the default space. Also for the CUDA backend there are certain +or C++ threads as the default space. Also for the CUDA backend there are certain choices, such as allowing relocatable device code, which must be made at installation time. Building Kokkos inline uses largely the same process as compiling an application against an installed Kokkos library. diff --git a/lib/kokkos/Spack.md b/lib/kokkos/Spack.md index 31a07deb56..79606c259d 100644 --- a/lib/kokkos/Spack.md +++ b/lib/kokkos/Spack.md @@ -24,20 +24,22 @@ By default, Spack doesn't 'see' anything on your system - including things like This can be limited by adding a `packages.yaml` to your `$HOME/.spack` folder that includes CMake (and CUDA, if applicable). For example, your `packages.yaml` file could be: ````yaml packages: - cuda: - modules: - cuda@10.1.243: [cuda/10.1.243] - paths: - cuda@10.1.243: - /opt/local/ppc64le-pwr8-nvidia/cuda/10.1.243 - buildable: false - cmake: - modules: - cmake: [cmake/3.16.8] - paths: - cmake: - /opt/local/ppc64le/cmake/3.16.8 - buildable: false + cuda: + buildable: false + externals: + - prefix: /opt/local/ppc64le-pwr8-nvidia/cuda/10.1.243 + spec: cuda@10.1.243 + - modules: + - cuda/10.1.243 + spec: cuda@10.1.243 + cmake: + buildable: false + externals: + - prefix: /opt/local/ppc64le/cmake/3.16.8 + spec: cmake@3.16.8 + - modules: + - cmake/3.16.8 + spec: cmake@3.16.8 ```` The `modules` entry is only necessary on systems that require loading Modules (i.e. most DOE systems). The `buildable` flag is useful to make sure Spack crashes if there is a path error, diff --git a/lib/kokkos/algorithms/src/CMakeLists.txt b/lib/kokkos/algorithms/src/CMakeLists.txt index cf5564032c..4b60d887f7 100644 --- a/lib/kokkos/algorithms/src/CMakeLists.txt +++ b/lib/kokkos/algorithms/src/CMakeLists.txt @@ -10,6 +10,7 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) FILE(GLOB ALGO_HEADERS *.hpp) FILE(GLOB ALGO_SOURCES *.cpp) LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h) +APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/*.hpp) INSTALL ( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index 46b8ab87fa..59c11afd9a 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -466,6 +466,25 @@ struct rand { }; #endif // defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT +#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT +template +struct rand { + using bhalf = Kokkos::Experimental::bhalf_t; + KOKKOS_INLINE_FUNCTION + static bhalf max() { return bhalf(1.0); } + KOKKOS_INLINE_FUNCTION + static bhalf draw(Generator& gen) { return bhalf(gen.frand()); } + KOKKOS_INLINE_FUNCTION + static bhalf draw(Generator& gen, const bhalf& range) { + return bhalf(gen.frand(float(range))); + } + KOKKOS_INLINE_FUNCTION + static bhalf draw(Generator& gen, const bhalf& start, const bhalf& end) { + return bhalf(gen.frand(float(start), float(end))); + } +}; +#endif // defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT + template struct rand { KOKKOS_INLINE_FUNCTION @@ -499,7 +518,7 @@ struct rand { }; template -struct rand > { +struct rand> { KOKKOS_INLINE_FUNCTION static Kokkos::complex max() { return Kokkos::complex(1.0, 1.0); @@ -528,7 +547,7 @@ struct rand > { }; template -struct rand > { +struct rand> { KOKKOS_INLINE_FUNCTION static Kokkos::complex max() { return Kokkos::complex(1.0, 1.0); @@ -617,24 +636,23 @@ struct Random_XorShift1024_UseCArrayState : std::false_type {}; #endif -template +template struct Random_UniqueIndex { - using locks_view_type = View; + using locks_view_type = View; KOKKOS_FUNCTION static int get_state_idx(const locks_view_type) { -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - const int i = ExecutionSpace::impl_hardware_thread_id(); - return i; -#else - return 0; -#endif + KOKKOS_IF_ON_HOST( + (return DeviceType::execution_space::impl_hardware_thread_id();)) + + KOKKOS_IF_ON_DEVICE((return 0;)) } }; #ifdef KOKKOS_ENABLE_CUDA -template <> -struct Random_UniqueIndex { - using locks_view_type = View; +template +struct Random_UniqueIndex> { + using locks_view_type = + View>; KOKKOS_FUNCTION static int get_state_idx(const locks_view_type& locks_) { #ifdef __CUDA_ARCH__ @@ -660,9 +678,11 @@ struct Random_UniqueIndex { #endif #ifdef KOKKOS_ENABLE_HIP -template <> -struct Random_UniqueIndex { - using locks_view_type = View; +template +struct Random_UniqueIndex< + Kokkos::Device> { + using locks_view_type = + View>; KOKKOS_FUNCTION static int get_state_idx(const locks_view_type& locks_) { #ifdef __HIP_DEVICE_COMPILE__ @@ -688,18 +708,37 @@ struct Random_UniqueIndex { #endif #ifdef KOKKOS_ENABLE_SYCL -template <> -struct Random_UniqueIndex { - using locks_view_type = View; +template +struct Random_UniqueIndex< + Kokkos::Device> { + using locks_view_type = + View>; KOKKOS_FUNCTION static int get_state_idx(const locks_view_type& locks_) { -#ifdef KOKKOS_ARCH_INTEL_GPU - int i = Kokkos::Impl::clock_tic() % locks_.extent(0); -#else - int i = 0; -#endif + auto item = sycl::ext::oneapi::experimental::this_nd_item<3>(); + std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1), + item.get_local_id(0)}; + std::size_t blockIdx[3] = {item.get_group(2), item.get_group(1), + item.get_group(0)}; + std::size_t blockDim[3] = {item.get_local_range(2), item.get_local_range(1), + item.get_local_range(0)}; + std::size_t gridDim[3] = { + item.get_global_range(2) / item.get_local_range(2), + item.get_global_range(1) / item.get_local_range(1), + item.get_global_range(0) / item.get_local_range(0)}; + const int i_offset = + (threadIdx[0] * blockDim[1] + threadIdx[1]) * blockDim[2] + + threadIdx[2]; + int i = + (((blockIdx[0] * gridDim[1] + blockIdx[1]) * gridDim[2] + blockIdx[2]) * + blockDim[0] * blockDim[1] * blockDim[2] + + i_offset) % + locks_.extent(0); while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) { - i = (i + 1) % static_cast(locks_.extent(0)); + i += blockDim[0] * blockDim[1] * blockDim[2]; + if (i >= static_cast(locks_.extent(0))) { + i = i_offset; + } } return i; } @@ -707,9 +746,12 @@ struct Random_UniqueIndex { #endif #ifdef KOKKOS_ENABLE_OPENMPTARGET -template <> -struct Random_UniqueIndex { - using locks_view_type = View; +template +struct Random_UniqueIndex< + Kokkos::Device> { + using locks_view_type = + View>; KOKKOS_FUNCTION static int get_state_idx(const locks_view_type& locks) { const int team_size = omp_get_num_threads(); @@ -873,10 +915,13 @@ class Random_XorShift64 { template class Random_XorShift64_Pool { + public: + using device_type = typename DeviceType::device_type; + private: - using execution_space = typename DeviceType::execution_space; - using locks_type = View; - using state_data_type = View; + using execution_space = typename device_type::execution_space; + using locks_type = View; + using state_data_type = View; locks_type locks_; state_data_type state_; int num_states_; @@ -884,7 +929,6 @@ class Random_XorShift64_Pool { public: using generator_type = Random_XorShift64; - using device_type = DeviceType; KOKKOS_INLINE_FUNCTION Random_XorShift64_Pool() { @@ -923,8 +967,10 @@ class Random_XorShift64_Pool { state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_, padding_); - typename state_data_type::HostMirror h_state = create_mirror_view(state_); - typename locks_type::HostMirror h_lock = create_mirror_view(locks_); + typename state_data_type::HostMirror h_state = + Kokkos::create_mirror_view(Kokkos::WithoutInitializing, state_); + typename locks_type::HostMirror h_lock = + Kokkos::create_mirror_view(Kokkos::WithoutInitializing, locks_); // Execute on the HostMirror's default execution space. Random_XorShift64 @@ -947,8 +993,7 @@ class Random_XorShift64_Pool { KOKKOS_INLINE_FUNCTION Random_XorShift64 get_state() const { - const int i = - Impl::Random_UniqueIndex::get_state_idx(locks_); + const int i = Impl::Random_UniqueIndex::get_state_idx(locks_); return Random_XorShift64(state_(i, 0), i); } @@ -1119,11 +1164,14 @@ class Random_XorShift1024 { template class Random_XorShift1024_Pool { + public: + using device_type = typename DeviceType::device_type; + private: - using execution_space = typename DeviceType::execution_space; - using locks_type = View; - using int_view_type = View; - using state_data_type = View; + using execution_space = typename device_type::execution_space; + using locks_type = View; + using int_view_type = View; + using state_data_type = View; locks_type locks_; state_data_type state_; @@ -1135,8 +1183,6 @@ class Random_XorShift1024_Pool { public: using generator_type = Random_XorShift1024; - using device_type = DeviceType; - KOKKOS_INLINE_FUNCTION Random_XorShift1024_Pool() { num_states_ = 0; } @@ -1175,9 +1221,12 @@ class Random_XorShift1024_Pool { state_ = state_data_type("Kokkos::Random_XorShift1024::state", num_states_); p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_, padding_); - typename state_data_type::HostMirror h_state = create_mirror_view(state_); - typename locks_type::HostMirror h_lock = create_mirror_view(locks_); - typename int_view_type::HostMirror h_p = create_mirror_view(p_); + typename state_data_type::HostMirror h_state = + Kokkos::create_mirror_view(Kokkos::WithoutInitializing, state_); + typename locks_type::HostMirror h_lock = + Kokkos::create_mirror_view(Kokkos::WithoutInitializing, locks_); + typename int_view_type::HostMirror h_p = + Kokkos::create_mirror_view(Kokkos::WithoutInitializing, p_); // Execute on the HostMirror's default execution space. Random_XorShift64 @@ -1203,8 +1252,7 @@ class Random_XorShift1024_Pool { KOKKOS_INLINE_FUNCTION Random_XorShift1024 get_state() const { - const int i = - Impl::Random_UniqueIndex::get_state_idx(locks_); + const int i = Impl::Random_UniqueIndex::get_state_idx(locks_); return Random_XorShift1024(state_, p_(i, 0), i); }; @@ -1224,265 +1272,34 @@ class Random_XorShift1024_Pool { namespace Impl { -template -struct fill_random_functor_range; template struct fill_random_functor_begin_end; template -struct fill_random_functor_range { +struct fill_random_functor_begin_end { using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type range; + typename ViewType::const_value_type begin, end; using Rand = rand; - fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_) - : a(a_), rand_pool(rand_pool_), range(range_) {} + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator()(const IndexType& i) const { + void operator()(IndexType) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for (IndexType j = 0; j < loops; j++) { - const IndexType idx = i * loops + j; - if (idx < static_cast(a.extent(0))) - a(idx) = Rand::draw(gen, range); - } + a() = Rand::draw(gen, begin, end); rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range { - using execution_space = typename ViewType::execution_space; - ViewType a; - RandomPool rand_pool; - typename ViewType::const_value_type range; - - using Rand = rand; - - fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_) - : a(a_), rand_pool(rand_pool_), range(range_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(IndexType i) const { - typename RandomPool::generator_type gen = rand_pool.get_state(); - for (IndexType j = 0; j < loops; j++) { - const IndexType idx = i * loops + j; - if (idx < static_cast(a.extent(0))) { - for (IndexType k = 0; k < static_cast(a.extent(1)); k++) - a(idx, k) = Rand::draw(gen, range); - } - } - rand_pool.free_state(gen); - } -}; - -template -struct fill_random_functor_range { - using execution_space = typename ViewType::execution_space; - ViewType a; - RandomPool rand_pool; - typename ViewType::const_value_type range; - - using Rand = rand; - - fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_) - : a(a_), rand_pool(rand_pool_), range(range_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(IndexType i) const { - typename RandomPool::generator_type gen = rand_pool.get_state(); - for (IndexType j = 0; j < loops; j++) { - const IndexType idx = i * loops + j; - if (idx < static_cast(a.extent(0))) { - for (IndexType k = 0; k < static_cast(a.extent(1)); k++) - for (IndexType l = 0; l < static_cast(a.extent(2)); l++) - a(idx, k, l) = Rand::draw(gen, range); - } - } - rand_pool.free_state(gen); - } -}; - -template -struct fill_random_functor_range { - using execution_space = typename ViewType::execution_space; - ViewType a; - RandomPool rand_pool; - typename ViewType::const_value_type range; - - using Rand = rand; - - fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_) - : a(a_), rand_pool(rand_pool_), range(range_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(IndexType i) const { - typename RandomPool::generator_type gen = rand_pool.get_state(); - for (IndexType j = 0; j < loops; j++) { - const IndexType idx = i * loops + j; - if (idx < static_cast(a.extent(0))) { - for (IndexType k = 0; k < static_cast(a.extent(1)); k++) - for (IndexType l = 0; l < static_cast(a.extent(2)); l++) - for (IndexType m = 0; m < static_cast(a.extent(3)); m++) - a(idx, k, l, m) = Rand::draw(gen, range); - } - } - rand_pool.free_state(gen); - } -}; - -template -struct fill_random_functor_range { - using execution_space = typename ViewType::execution_space; - ViewType a; - RandomPool rand_pool; - typename ViewType::const_value_type range; - - using Rand = rand; - - fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_) - : a(a_), rand_pool(rand_pool_), range(range_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(IndexType i) const { - typename RandomPool::generator_type gen = rand_pool.get_state(); - for (IndexType j = 0; j < loops; j++) { - const IndexType idx = i * loops + j; - if (idx < static_cast(a.extent(0))) { - for (IndexType k = 0; k < static_cast(a.extent(1)); k++) - for (IndexType l = 0; l < static_cast(a.extent(2)); l++) - for (IndexType m = 0; m < static_cast(a.extent(3)); m++) - for (IndexType n = 0; n < static_cast(a.extent(4)); - n++) - a(idx, k, l, m, n) = Rand::draw(gen, range); - } - } - rand_pool.free_state(gen); - } -}; - -template -struct fill_random_functor_range { - using execution_space = typename ViewType::execution_space; - ViewType a; - RandomPool rand_pool; - typename ViewType::const_value_type range; - - using Rand = rand; - - fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_) - : a(a_), rand_pool(rand_pool_), range(range_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(IndexType i) const { - typename RandomPool::generator_type gen = rand_pool.get_state(); - for (IndexType j = 0; j < loops; j++) { - const IndexType idx = i * loops + j; - if (idx < static_cast(a.extent(0))) { - for (IndexType k = 0; k < static_cast(a.extent(1)); k++) - for (IndexType l = 0; l < static_cast(a.extent(2)); l++) - for (IndexType m = 0; m < static_cast(a.extent(3)); m++) - for (IndexType n = 0; n < static_cast(a.extent(4)); - n++) - for (IndexType o = 0; o < static_cast(a.extent(5)); - o++) - a(idx, k, l, m, n, o) = Rand::draw(gen, range); - } - } - rand_pool.free_state(gen); - } -}; - -template -struct fill_random_functor_range { - using execution_space = typename ViewType::execution_space; - ViewType a; - RandomPool rand_pool; - typename ViewType::const_value_type range; - - using Rand = rand; - - fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_) - : a(a_), rand_pool(rand_pool_), range(range_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(IndexType i) const { - typename RandomPool::generator_type gen = rand_pool.get_state(); - for (IndexType j = 0; j < loops; j++) { - const IndexType idx = i * loops + j; - if (idx < static_cast(a.extent(0))) { - for (IndexType k = 0; k < static_cast(a.extent(1)); k++) - for (IndexType l = 0; l < static_cast(a.extent(2)); l++) - for (IndexType m = 0; m < static_cast(a.extent(3)); m++) - for (IndexType n = 0; n < static_cast(a.extent(4)); - n++) - for (IndexType o = 0; o < static_cast(a.extent(5)); - o++) - for (IndexType p = 0; p < static_cast(a.extent(6)); - p++) - a(idx, k, l, m, n, o, p) = Rand::draw(gen, range); - } - } - rand_pool.free_state(gen); - } -}; - -template -struct fill_random_functor_range { - using execution_space = typename ViewType::execution_space; - ViewType a; - RandomPool rand_pool; - typename ViewType::const_value_type range; - - using Rand = rand; - - fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_) - : a(a_), rand_pool(rand_pool_), range(range_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(IndexType i) const { - typename RandomPool::generator_type gen = rand_pool.get_state(); - for (IndexType j = 0; j < loops; j++) { - const IndexType idx = i * loops + j; - if (idx < static_cast(a.extent(0))) { - for (IndexType k = 0; k < static_cast(a.extent(1)); k++) - for (IndexType l = 0; l < static_cast(a.extent(2)); l++) - for (IndexType m = 0; m < static_cast(a.extent(3)); m++) - for (IndexType n = 0; n < static_cast(a.extent(4)); - n++) - for (IndexType o = 0; o < static_cast(a.extent(5)); - o++) - for (IndexType p = 0; p < static_cast(a.extent(6)); - p++) - for (IndexType q = 0; - q < static_cast(a.extent(7)); q++) - a(idx, k, l, m, n, o, p, q) = Rand::draw(gen, range); - } - } - rand_pool.free_state(gen); - } -}; template struct fill_random_functor_begin_end { @@ -1752,19 +1569,6 @@ struct fill_random_functor_begin_end -void fill_random(ViewType a, RandomPool g, - typename ViewType::const_value_type range) { - int64_t LDA = a.extent(0); - if (LDA > 0) - parallel_for("Kokkos::fill_random", (LDA + 127) / 128, - Impl::fill_random_functor_range( - a, g, range)); -} - template void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin, @@ -1776,6 +1580,23 @@ void fill_random(ViewType a, RandomPool g, ViewType::Rank, IndexType>( a, g, begin, end)); } + +} // namespace Impl + +template +void fill_random(ViewType a, RandomPool g, + typename ViewType::const_value_type begin, + typename ViewType::const_value_type end) { + Impl::apply_to_view_of_static_rank( + [&](auto dst) { Kokkos::Impl::fill_random(dst, g, begin, end); }, a); +} + +template +void fill_random(ViewType a, RandomPool g, + typename ViewType::const_value_type range) { + fill_random(a, g, 0, range); +} + } // namespace Kokkos #endif diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp index 9c2e8b978b..cde5e6857e 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -437,16 +437,41 @@ struct BinOp1D { BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, typename KeyViewType::const_value_type max) : max_bins_(max_bins__ + 1), - mul_(1.0 * max_bins__ / (max - min)), + // Cast to int64_t to avoid possible overflow when using integer + mul_(std::is_integral::value + ? 1.0 * max_bins__ / (int64_t(max) - int64_t(min)) + : 1.0 * max_bins__ / (max - min)), range_(max - min), - min_(min) {} + min_(min) { + // For integral types the number of bins may be larger than the range + // in which case we can exactly have one unique value per bin + // and then don't need to sort bins. + if (std::is_integral::value && + static_cast(range_) <= static_cast(max_bins__)) { + mul_ = 1.; + } + } // Determine bin index from key value - template + template < + class ViewType, + std::enable_if_t::value, + bool> = true> KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { return int(mul_ * (keys(i) - min_)); } + // Determine bin index from key value + template < + class ViewType, + std::enable_if_t::value, + bool> = true> + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + // The cast to int64_t is necessary because otherwise HIP returns the wrong + // result. + return int(mul_ * (int64_t(keys(i)) - int64_t(min_))); + } + // Return maximum bin index + 1 KOKKOS_INLINE_FUNCTION int max_bins() const { return max_bins_; } @@ -564,8 +589,26 @@ std::enable_if_t::value> sort( exec, 0, view.extent(0)), Impl::min_max_functor(view), reducer); if (result.min_val == result.max_val) return; + // For integral types the number of bins may be larger than the range + // in which case we can exactly have one unique value per bin + // and then don't need to sort bins. + bool sort_in_bins = true; + // TODO: figure out better max_bins then this ... + int64_t max_bins = view.extent(0) / 2; + if (std::is_integral::value) { + // Cast to int64_t to avoid possible overflow when using integer + int64_t const max_val = result.max_val; + int64_t const min_val = result.min_val; + // using 10M as the cutoff for special behavior (roughly 40MB for the count + // array) + if ((max_val - min_val) < 10000000) { + max_bins = max_val - min_val + 1; + sort_in_bins = false; + } + } + BinSort bin_sort( - view, CompType(view.extent(0) / 2, result.min_val, result.max_val), true); + view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins); bin_sort.create_permute_vector(exec); bin_sort.sort(exec, view); } diff --git a/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp b/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp new file mode 100644 index 0000000000..2e3babbcf0 --- /dev/null +++ b/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp @@ -0,0 +1,102 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_HPP +#define KOKKOS_STD_ALGORITHMS_HPP + +/// \file Kokkos_StdAlgorithms.hpp +/// \brief Kokkos counterparts for Standard C++ Library algorithms + +#include +#include +#include + +// distance +#include + +// move, swap, iter_swap +#include "std_algorithms/Kokkos_ModifyingOperations.hpp" + +// find, find_if, find_if_not +// for_each, for_each_n +// mismatch +// equal +// count_if, count +// all_of, any_of, none_of +// adjacent_find +// lexicographical_compare +// search, search_n +// find_first_of, find_end +#include + +// replace, replace_copy_if, replace_copy, replace_if +// copy, copy_n, copy_backward, copy_if +// fill, fill_n +// transform +// generate, generate_n +// reverse, reverse_copy +// move, move_backward +// swap_ranges +// unique, unique_copy +// rotate, rotate_copy +// remove, remove_if, remove_copy, remove_copy_if +// shift_left, shift_right +#include + +// is_sorted_until, is_sorted +#include + +// min_element, max_element, minmax_element +#include + +// is_partitioned, partition_copy, partition_point +#include + +// adjacent_difference +// reduce, transform_reduce +// exclusive_scan, transform_exclusive_scan +// inclusive_scan, transform_inclusive_scan +#include + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp new file mode 100644 index 0000000000..beb53fdd70 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp @@ -0,0 +1,105 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_BEGIN_END_HPP +#define KOKKOS_BEGIN_END_HPP + +#include +#include "Kokkos_RandomAccessIterator.hpp" +#include "Kokkos_Constraints.hpp" + +/// \file Kokkos_BeginEnd.hpp +/// \brief Kokkos begin, end, cbegin, cend + +namespace Kokkos { +namespace Experimental { + +template +KOKKOS_INLINE_FUNCTION auto begin( + const Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + using it_t = + Impl::RandomAccessIterator>; + return it_t(v); +} + +template +KOKKOS_INLINE_FUNCTION auto end( + const Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + using it_t = + Impl::RandomAccessIterator>; + return it_t(v, v.extent(0)); +} + +template +KOKKOS_INLINE_FUNCTION auto cbegin( + const Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + using ViewConstType = + typename Kokkos::View::const_type; + const ViewConstType cv = v; + using it_t = Impl::RandomAccessIterator; + return it_t(cv); +} + +template +KOKKOS_INLINE_FUNCTION auto cend( + const Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + using ViewConstType = + typename Kokkos::View::const_type; + const ViewConstType cv = v; + using it_t = Impl::RandomAccessIterator; + return it_t(cv, cv.extent(0)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Constraints.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Constraints.hpp new file mode 100644 index 0000000000..ec54cd1900 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Constraints.hpp @@ -0,0 +1,237 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_CONSTRAINTS_HPP_ +#define KOKKOS_STD_ALGORITHMS_CONSTRAINTS_HPP_ + +#include +#include + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template +struct is_admissible_to_kokkos_std_algorithms : std::false_type {}; + +template +struct is_admissible_to_kokkos_std_algorithms< + T, std::enable_if_t< ::Kokkos::is_view::value && T::rank == 1 && + (std::is_same::value || + std::is_same::value || + std::is_same::value)> > + : std::true_type {}; + +template +KOKKOS_INLINE_FUNCTION constexpr void +static_assert_is_admissible_to_kokkos_std_algorithms( + const ViewType& /* view */) { + static_assert(is_admissible_to_kokkos_std_algorithms::value, + "Currently, Kokkos standard algorithms only accept 1D Views."); +} + +// +// is_iterator +// +template +using iterator_category_t = typename T::iterator_category; + +template +using is_iterator = Kokkos::is_detected; + +// +// are_iterators +// +template +struct are_iterators; + +template +struct are_iterators { + static constexpr bool value = is_iterator::value; +}; + +template +struct are_iterators { + static constexpr bool value = + are_iterators::value && are_iterators::value; +}; + +// +// are_random_access_iterators +// +template +struct are_random_access_iterators; + +template +struct are_random_access_iterators { + static constexpr bool value = + is_iterator::value && + std::is_base_of::value; +}; + +template +struct are_random_access_iterators { + static constexpr bool value = are_random_access_iterators::value && + are_random_access_iterators::value; +}; + +// +// iterators_are_accessible_from +// +template +struct iterators_are_accessible_from; + +template +struct iterators_are_accessible_from { + using view_type = typename IteratorType::view_type; + static constexpr bool value = + SpaceAccessibility::accessible; +}; + +template +struct iterators_are_accessible_from { + static constexpr bool value = + iterators_are_accessible_from::value && + iterators_are_accessible_from::value; +}; + +template +KOKKOS_INLINE_FUNCTION constexpr void +static_assert_random_access_and_accessible(const ExecutionSpace& /* ex */, + IteratorTypes... /* iterators */) { + static_assert( + are_random_access_iterators::value, + "Currently, Kokkos standard algorithms require random access iterators."); + static_assert( + iterators_are_accessible_from::value, + "Incompatible view/iterator and execution space"); +} + +// +// have matching difference_type +// +template +struct iterators_have_matching_difference_type; + +template +struct iterators_have_matching_difference_type { + static constexpr bool value = true; +}; + +template +struct iterators_have_matching_difference_type { + static constexpr bool value = + std::is_same::value; +}; + +template +struct iterators_have_matching_difference_type { + static constexpr bool value = + iterators_have_matching_difference_type::value && + iterators_have_matching_difference_type::value; +}; + +template +KOKKOS_INLINE_FUNCTION constexpr void +static_assert_iterators_have_matching_difference_type(IteratorType1 /* it1 */, + IteratorType2 /* it2 */) { + static_assert(iterators_have_matching_difference_type::value, + "Iterators do not have matching difference_type"); +} + +template +KOKKOS_INLINE_FUNCTION constexpr void +static_assert_iterators_have_matching_difference_type(IteratorType1 it1, + IteratorType2 it2, + IteratorType3 it3) { + static_assert_iterators_have_matching_difference_type(it1, it2); + static_assert_iterators_have_matching_difference_type(it2, it3); +} + +// +// not_openmptarget +// +template +struct not_openmptarget { +#ifndef KOKKOS_ENABLE_OPENMPTARGET + static constexpr bool value = true; +#else + static constexpr bool value = + !std::is_same, + ::Kokkos::Experimental::OpenMPTarget>::value; +#endif +}; + +template +KOKKOS_INLINE_FUNCTION constexpr void static_assert_is_not_openmptarget( + const ExecutionSpace&) { + static_assert(not_openmptarget::value, + "Currently, Kokkos standard algorithms do not support custom " + "comparators in OpenMPTarget"); +} + +// +// valid range +// +template +void expect_valid_range(IteratorType first, IteratorType last) { + // this is a no-op for release + KOKKOS_EXPECTS(last >= first); + // avoid compiler complaining when KOKKOS_EXPECTS is no-op + (void)first; + (void)last; +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp new file mode 100644 index 0000000000..ced4370472 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp @@ -0,0 +1,69 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_DISTANCE_HPP +#define KOKKOS_STD_ALGORITHMS_DISTANCE_HPP + +#include "Kokkos_Constraints.hpp" +#include "Kokkos_RandomAccessIterator.hpp" + +namespace Kokkos { +namespace Experimental { + +template +KOKKOS_INLINE_FUNCTION constexpr typename IteratorType::difference_type +distance(IteratorType first, IteratorType last) { + static_assert( + ::Kokkos::Experimental::Impl::are_random_access_iterators< + IteratorType>::value, + "Kokkos::Experimental::distance: only implemented for random access " + "iterators."); + + return last - first; +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_HelperPredicates.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_HelperPredicates.hpp new file mode 100644 index 0000000000..18d5dadd53 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_HelperPredicates.hpp @@ -0,0 +1,120 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_HELPER_PREDICATES_HPP +#define KOKKOS_STD_HELPER_PREDICATES_HPP + +#include + +// naming convetion: +// StdAlgoSomeExpressiveNameUnaryPredicate +// StdAlgoSomeExpressiveNameBinaryPredicate + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +// ------------------ +// UNARY PREDICATES +// ------------------ +template +struct StdAlgoEqualsValUnaryPredicate { + T m_value; + + KOKKOS_FUNCTION + constexpr bool operator()(const T& val) const { return val == m_value; } + + KOKKOS_FUNCTION + constexpr explicit StdAlgoEqualsValUnaryPredicate(const T& _value) + : m_value(_value) {} +}; + +template +struct StdAlgoNotEqualsValUnaryPredicate { + T m_value; + + KOKKOS_FUNCTION + constexpr bool operator()(const T& val) const { return !(val == m_value); } + + KOKKOS_FUNCTION + constexpr explicit StdAlgoNotEqualsValUnaryPredicate(const T& _value) + : m_value(_value) {} +}; + +template +struct StdAlgoNegateUnaryPredicateWrapper { + PredicateType m_pred; + + KOKKOS_FUNCTION + constexpr bool operator()(const ValueType& val) const { return !m_pred(val); } + + KOKKOS_FUNCTION + constexpr explicit StdAlgoNegateUnaryPredicateWrapper( + const PredicateType& pred) + : m_pred(pred) {} +}; + +// ------------------ +// BINARY PREDICATES +// ------------------ +template +struct StdAlgoEqualBinaryPredicate { + KOKKOS_FUNCTION + constexpr bool operator()(const ValueType1& a, const ValueType2& b) const { + return a == b; + } +}; + +template +struct StdAlgoLessThanBinaryPredicate { + KOKKOS_FUNCTION + constexpr bool operator()(const ValueType1& a, const ValueType2& b) const { + return a < b; + } +}; + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElementOperations.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElementOperations.hpp new file mode 100644 index 0000000000..aa8f5ba376 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElementOperations.hpp @@ -0,0 +1,409 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_MIN_MAX_ELEMENT_OPERATIONS_HPP +#define KOKKOS_STD_MIN_MAX_ELEMENT_OPERATIONS_HPP + +#include +#include "Kokkos_BeginEnd.hpp" +#include "Kokkos_Constraints.hpp" +#include "Kokkos_Distance.hpp" +#include "Kokkos_ModifyingOperations.hpp" + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template +struct StdMinOrMaxElemFunctor { + using index_type = typename IteratorType::difference_type; + using red_value_type = typename ReducerType::value_type; + + IteratorType m_first; + ReducerType m_reducer; + + KOKKOS_FUNCTION + void operator()(const index_type i, red_value_type& red_value) const { + m_reducer.join(red_value, red_value_type{m_first[i], i}); + } + + KOKKOS_FUNCTION + StdMinOrMaxElemFunctor(IteratorType first, ReducerType reducer) + : m_first(std::move(first)), m_reducer(std::move(reducer)) {} +}; + +template +struct StdMinMaxElemFunctor { + using index_type = typename IteratorType::difference_type; + using red_value_type = typename ReducerType::value_type; + IteratorType m_first; + ReducerType m_reducer; + + KOKKOS_FUNCTION + void operator()(const index_type i, red_value_type& red_value) const { + const auto& my_value = m_first[i]; + m_reducer.join(red_value, red_value_type{my_value, my_value, i, i}); + } + + KOKKOS_FUNCTION + StdMinMaxElemFunctor(IteratorType first, ReducerType reducer) + : m_first(std::move(first)), m_reducer(std::move(reducer)) {} +}; + +// ------------------------------------------ +// min_or_max_element_impl +// ------------------------------------------ +template