From b7b9a4a599dfa200e79b3f85c7da0b35e81c6330 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 28 Mar 2025 15:29:14 -0600 Subject: [PATCH] Update Kokkos library in LAMMPS to v4.6.0 --- lib/kokkos/CHANGELOG.md | 67 + lib/kokkos/CMakeLists.txt | 4 +- lib/kokkos/CTestConfig.cmake | 4 + lib/kokkos/Makefile.kokkos | 44 +- lib/kokkos/README.md | 12 +- lib/kokkos/algorithms/CMakeLists.txt | 4 + .../algorithms/perf_test/CMakeLists.txt | 63 + .../perf_test/test_inclusive_scan.cpp | 191 + lib/kokkos/algorithms/src/Kokkos_Random.hpp | 7 + .../src/sorting/Kokkos_BinOpsPublicAPI.hpp | 2 +- .../src/sorting/Kokkos_SortPublicAPI.hpp | 20 +- .../src/sorting/impl/Kokkos_SortByKeyImpl.hpp | 64 +- .../src/sorting/impl/Kokkos_SortImpl.hpp | 64 +- .../impl/Kokkos_Constraints.hpp | 13 +- .../src/std_algorithms/impl/Kokkos_CopyIf.hpp | 5 +- .../impl/Kokkos_ExclusiveScan.hpp | 4 +- .../Kokkos_IdentityReferenceUnaryFunctor.hpp | 7 +- .../impl/Kokkos_InclusiveScan.hpp | 160 +- .../impl/Kokkos_RandomAccessIterator.hpp | 137 +- .../src/std_algorithms/impl/Kokkos_Unique.hpp | 8 +- .../std_algorithms/impl/Kokkos_UniqueCopy.hpp | 5 +- lib/kokkos/algorithms/unit_tests/Makefile | 2 + .../algorithms/unit_tests/TestRandom.hpp | 10 +- .../unit_tests/TestRandomAccessIterator.cpp | 18 +- lib/kokkos/algorithms/unit_tests/TestSort.hpp | 1 + .../unit_tests/TestStdAlgorithmsCommon.hpp | 5 +- .../TestStdAlgorithmsCompileOnly.cpp | 14 + .../TestStdAlgorithmsConstraints.cpp | 1 + .../TestStdAlgorithmsExclusiveScan.cpp | 2 +- .../TestStdAlgorithmsInclusiveScan.cpp | 2 +- .../TestStdAlgorithmsIsSortedUntil.cpp | 11 +- .../unit_tests/TestStdAlgorithmsModOps.cpp | 6 +- .../unit_tests/TestStdAlgorithmsNumerics.cpp | 55 +- .../unit_tests/TestStdAlgorithmsRotate.cpp | 2 +- ...estStdAlgorithmsTeamAdjacentDifference.cpp | 1 + .../TestStdAlgorithmsTeamAdjacentFind.cpp | 1 + .../unit_tests/TestStdAlgorithmsTeamEqual.cpp | 1 + .../TestStdAlgorithmsTeamExclusiveScan.cpp | 1 + .../TestStdAlgorithmsTeamFindEnd.cpp | 1 + .../TestStdAlgorithmsTeamFindFirstOf.cpp | 1 + .../TestStdAlgorithmsTeamFindIf.cpp | 12 - .../TestStdAlgorithmsTeamFindIfNot.cpp | 12 - .../TestStdAlgorithmsTeamInclusiveScan.cpp | 1 + ...tdAlgorithmsTeamLexicographicalCompare.cpp | 1 + .../TestStdAlgorithmsTeamMismatch.cpp | 1 + .../TestStdAlgorithmsTeamReduce.cpp | 1 + .../TestStdAlgorithmsTeamSearch.cpp | 1 + .../TestStdAlgorithmsTeamSearchN.cpp | 1 + ...tdAlgorithmsTeamTransformExclusiveScan.cpp | 1 + ...tdAlgorithmsTeamTransformInclusiveScan.cpp | 1 + .../TestStdAlgorithmsTeamTransformReduce.cpp | 1 + ...estStdAlgorithmsTransformExclusiveScan.cpp | 3 +- ...estStdAlgorithmsTransformInclusiveScan.cpp | 3 +- lib/kokkos/benchmarks/atomic/Makefile | 1 + .../benchmarks/bytes_and_flops/Makefile | 1 + lib/kokkos/benchmarks/gather/Makefile | 1 + .../launch_latency/launch_latency.cpp | 7 +- .../benchmarks/policy_performance/Makefile | 1 + .../benchmarks/policy_performance/main.cpp | 7 +- .../policy_performance/policy_perf_test.hpp | 7 +- lib/kokkos/benchmarks/stream/Makefile | 1 + .../benchmarks/view_copy_constructor/Makefile | 1 + lib/kokkos/bin/nvcc_wrapper | 40 +- lib/kokkos/cmake/KokkosConfig.cmake.in | 90 +- lib/kokkos/cmake/KokkosConfigCommon.cmake.in | 384 +- lib/kokkos/cmake/KokkosCore_config.h.in | 5 +- lib/kokkos/cmake/Modules/FindTPLCUDA.cmake | 13 +- lib/kokkos/cmake/intel.cmake | 15 - lib/kokkos/cmake/kokkos_arch.cmake | 74 +- lib/kokkos/cmake/kokkos_compiler_id.cmake | 8 +- lib/kokkos/cmake/kokkos_enable_options.cmake | 2 +- lib/kokkos/cmake/kokkos_functions.cmake | 1 - lib/kokkos/cmake/kokkos_test_cxx_std.cmake | 3 - lib/kokkos/cmake/kokkos_tribits.cmake | 1 - .../containers/performance_tests/Makefile | 2 + lib/kokkos/containers/src/Kokkos_Bitset.hpp | 51 +- lib/kokkos/containers/src/Kokkos_DualView.hpp | 38 +- .../containers/src/Kokkos_DynRankView.hpp | 77 +- .../containers/src/Kokkos_DynamicView.hpp | 39 +- .../containers/src/Kokkos_ErrorReporter.hpp | 21 +- .../containers/src/Kokkos_OffsetView.hpp | 14 +- .../containers/src/Kokkos_ScatterView.hpp | 18 +- .../containers/src/Kokkos_StaticCrsGraph.hpp | 17 + .../containers/src/Kokkos_UnorderedMap.hpp | 44 +- lib/kokkos/containers/src/Kokkos_Vector.hpp | 55 +- .../src/impl/Kokkos_Functional_impl.hpp | 3 + .../containers/unit_tests/CMakeLists.txt | 4 +- lib/kokkos/containers/unit_tests/Makefile | 9 +- .../containers/unit_tests/TestDualView.hpp | 164 +- .../TestDynRankView_TeamScratch.hpp | 8 +- .../containers/unit_tests/TestDynViewAPI.hpp | 26 +- .../containers/unit_tests/TestOffsetView.hpp | 63 +- .../containers/unit_tests/TestScatterView.hpp | 6 +- .../unit_tests/TestStaticCrsGraph.hpp | 2 + lib/kokkos/core/perf_test/BenchmarkMain.cpp | 1 + lib/kokkos/core/perf_test/CMakeLists.txt | 1 + lib/kokkos/core/perf_test/Makefile | 2 + .../perf_test/PerfTest_CustomReduction.cpp | 4 +- .../core/perf_test/PerfTest_ViewCopy.hpp | 32 +- .../core/perf_test/PerfTest_ViewCopy_a123.cpp | 31 + .../core/perf_test/PerfTest_ViewFill.hpp | 20 + .../core/perf_test/PerfTest_ViewFill_123.cpp | 2 + lib/kokkos/core/perf_test/test_atomic.cpp | 3 + lib/kokkos/core/perf_test/test_reduction.cpp | 121 + lib/kokkos/core/src/CMakeLists.txt | 4 + lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.hpp | 6 +- .../src/Cuda/Kokkos_Cuda_GraphNodeKernel.hpp | 10 +- .../core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp | 52 +- .../src/Cuda/Kokkos_Cuda_Half_Conversion.hpp | 144 +- .../core/src/Cuda/Kokkos_Cuda_Instance.cpp | 56 +- .../core/src/Cuda/Kokkos_Cuda_Instance.hpp | 24 +- .../src/Cuda/Kokkos_Cuda_KernelLaunch.hpp | 23 +- .../src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp | 15 +- .../src/Cuda/Kokkos_Cuda_Parallel_Range.hpp | 57 +- .../src/Cuda/Kokkos_Cuda_Parallel_Team.hpp | 52 +- .../core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp | 8 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp | 28 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp | 29 +- .../src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp | 4 +- .../core/src/Cuda/Kokkos_Cuda_abort.hpp | 1 + lib/kokkos/core/src/HIP/Kokkos_HIP.cpp | 51 +- .../HIP/Kokkos_HIP_BlockSize_Deduction.hpp | 82 +- .../core/src/HIP/Kokkos_HIP_DeepCopy.cpp | 4 +- .../core/src/HIP/Kokkos_HIP_DeepCopy.hpp | 6 +- .../src/HIP/Kokkos_HIP_GraphNodeKernel.hpp | 10 +- .../core/src/HIP/Kokkos_HIP_Graph_Impl.hpp | 92 +- .../src/HIP/Kokkos_HIP_Half_Conversion.hpp | 28 +- .../core/src/HIP/Kokkos_HIP_Instance.cpp | 113 +- .../core/src/HIP/Kokkos_HIP_Instance.hpp | 153 +- .../core/src/HIP/Kokkos_HIP_IsXnack.cpp | 92 + .../core/src/HIP/Kokkos_HIP_IsXnack.hpp | 63 + .../core/src/HIP/Kokkos_HIP_KernelLaunch.hpp | 136 +- .../src/HIP/Kokkos_HIP_ParallelFor_Range.hpp | 30 +- .../src/HIP/Kokkos_HIP_ParallelFor_Team.hpp | 12 +- .../HIP/Kokkos_HIP_ParallelReduce_Range.hpp | 23 +- .../HIP/Kokkos_HIP_ParallelReduce_Team.hpp | 4 +- .../src/HIP/Kokkos_HIP_ParallelScan_Range.hpp | 4 +- .../core/src/HIP/Kokkos_HIP_ReduceScan.hpp | 6 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Space.cpp | 71 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Space.hpp | 44 +- lib/kokkos/core/src/HIP/Kokkos_HIP_Team.hpp | 2 +- .../src/HIP/Kokkos_HIP_TeamPolicyInternal.hpp | 41 +- .../src/HIP/Kokkos_HIP_WorkGraphPolicy.hpp | 4 +- .../core/src/HIP/Kokkos_HIP_ZeroMemset.cpp | 3 +- .../core/src/HIP/Kokkos_HIP_ZeroMemset.hpp | 12 +- lib/kokkos/core/src/HPX/Kokkos_HPX.hpp | 2 +- lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp | 4 +- lib/kokkos/core/src/Kokkos_Array.hpp | 42 +- .../core/src/Kokkos_BitManipulation.hpp | 2 +- lib/kokkos/core/src/Kokkos_Complex.hpp | 20 +- lib/kokkos/core/src/Kokkos_CopyViews.hpp | 872 +--- lib/kokkos/core/src/Kokkos_Core_fwd.hpp | 8 - lib/kokkos/core/src/Kokkos_ExecPolicy.hpp | 26 +- lib/kokkos/core/src/Kokkos_Future.hpp | 89 +- lib/kokkos/core/src/Kokkos_Graph.hpp | 11 + lib/kokkos/core/src/Kokkos_GraphNode.hpp | 61 +- lib/kokkos/core/src/Kokkos_Macros.hpp | 27 +- lib/kokkos/core/src/Kokkos_MemoryPool.hpp | 19 +- .../core/src/Kokkos_Parallel_Reduce.hpp | 34 +- lib/kokkos/core/src/Kokkos_TaskScheduler.hpp | 5 +- lib/kokkos/core/src/Kokkos_Tuners.hpp | 4 + lib/kokkos/core/src/Kokkos_TypeInfo.hpp | 11 +- lib/kokkos/core/src/Kokkos_View.hpp | 2 +- .../src/OpenMP/Kokkos_OpenMP_Instance.cpp | 31 +- .../src/OpenMP/Kokkos_OpenMP_Instance.hpp | 6 +- .../src/OpenMP/Kokkos_OpenMP_Parallel_For.hpp | 42 +- .../OpenMP/Kokkos_OpenMP_Parallel_Reduce.hpp | 20 +- .../OpenMP/Kokkos_OpenMP_Parallel_Scan.hpp | 8 +- .../core/src/OpenMP/Kokkos_OpenMP_Task.hpp | 33 +- .../OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp | 4 +- lib/kokkos/core/src/SYCL/Kokkos_SYCL.cpp | 16 +- lib/kokkos/core/src/SYCL/Kokkos_SYCL.hpp | 9 +- .../core/src/SYCL/Kokkos_SYCL_DeepCopy.hpp | 6 +- .../src/SYCL/Kokkos_SYCL_GraphNodeKernel.hpp | 10 +- .../core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp | 48 +- .../src/SYCL/Kokkos_SYCL_Half_Conversion.hpp | 108 +- .../core/src/SYCL/Kokkos_SYCL_Instance.cpp | 3 + .../core/src/SYCL/Kokkos_SYCL_Instance.hpp | 18 +- .../src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp | 5 +- .../Kokkos_SYCL_ParallelReduce_MDRange.hpp | 6 +- .../SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp | 5 +- .../SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp | 5 +- .../SYCL/Kokkos_SYCL_ParallelScan_Range.hpp | 7 +- .../core/src/SYCL/Kokkos_SYCL_Space.cpp | 26 +- lib/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp | 2 +- .../core/src/SYCL/Kokkos_SYCL_TeamPolicy.hpp | 27 +- .../SYCL/Kokkos_SYCL_WorkgroupReduction.hpp | 6 +- .../core/src/SYCL/Kokkos_SYCL_ZeroMemset.hpp | 7 +- lib/kokkos/core/src/Serial/Kokkos_Serial.hpp | 58 +- .../core/src/Threads/Kokkos_Threads_Team.hpp | 9 +- lib/kokkos/core/src/View/Kokkos_BasicView.hpp | 197 +- .../Kokkos_ViewAccessPreconditionsCheck.hpp | 160 + .../core/src/View/Kokkos_ViewCommonType.hpp | 128 + lib/kokkos/core/src/View/Kokkos_ViewCtor.hpp | 21 +- .../core/src/View/Kokkos_ViewLegacy.hpp | 129 +- .../core/src/View/Kokkos_ViewMapping.hpp | 12 +- .../View/MDSpan/Kokkos_MDSpan_Accessor.hpp | 27 +- .../src/View/MDSpan/Kokkos_MDSpan_Layout.hpp | 134 +- .../src/impl/KokkosExp_Host_IterateTile.hpp | 6 +- .../src/impl/KokkosExp_IterateTileGPU.hpp | 215 +- lib/kokkos/core/src/impl/Kokkos_BitOps.hpp | 34 +- lib/kokkos/core/src/impl/Kokkos_Core.cpp | 32 +- .../impl/Kokkos_Default_GraphNodeKernel.hpp | 8 +- .../impl/Kokkos_Default_GraphNode_Impl.hpp | 6 +- .../src/impl/Kokkos_Default_Graph_Impl.hpp | 12 +- .../src/impl/Kokkos_Default_Graph_fwd.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_EBO.hpp | 2 + .../core/src/impl/Kokkos_FunctorAnalysis.hpp | 37 - lib/kokkos/core/src/impl/Kokkos_GraphImpl.hpp | 11 + .../core/src/impl/Kokkos_GraphImpl_fwd.hpp | 3 + .../src/impl/Kokkos_GraphNodeThenImpl.hpp | 58 + .../impl/Kokkos_Half_FloatingPointWrapper.hpp | 96 +- lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp | 4 - .../impl/Kokkos_InitializationSettings.hpp | 32 +- lib/kokkos/core/src/impl/Kokkos_LIFO.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_Profiling.cpp | 61 +- .../core/src/impl/Kokkos_SharedAlloc.hpp | 77 +- .../src/impl/Kokkos_SharedAlloc_timpl.hpp | 44 +- .../src/impl/Kokkos_SimpleTaskScheduler.hpp | 22 +- .../core/src/impl/Kokkos_Stacktrace.cpp | 1 + .../src/impl/Kokkos_StringManipulation.hpp | 4 +- .../core/src/impl/Kokkos_TaskQueueCommon.hpp | 8 +- .../impl/Kokkos_TaskQueueMemoryManager.hpp | 4 +- .../core/src/impl/Kokkos_TeamMDPolicy.hpp | 3 + lib/kokkos/core/src/impl/Kokkos_Traits.hpp | 114 - lib/kokkos/core/src/impl/Kokkos_Utilities.hpp | 4 + .../core/src/impl/Kokkos_VLAEmulation.hpp | 7 +- .../core/src/setup/Kokkos_Setup_HIP.hpp | 6 + .../src/traits/Kokkos_PolicyTraitAdaptor.hpp | 4 + lib/kokkos/core/unit_test/CMakeLists.txt | 67 +- .../core/unit_test/IncrementalTest.cpp.in | 7 - ...ionEnvironmentNeverInitializedFixture.hpp} | 30 +- lib/kokkos/core/unit_test/Makefile | 11 +- lib/kokkos/core/unit_test/TestAbort.hpp | 2 +- lib/kokkos/core/unit_test/TestArray.cpp | 6 +- .../core/unit_test/TestAtomicOperations.hpp | 3 + .../unit_test/TestAtomicOperations_double.hpp | 2 +- .../unit_test/TestAtomicOperations_float.hpp | 2 +- lib/kokkos/core/unit_test/TestAtomics.hpp | 5 + lib/kokkos/core/unit_test/TestCXX11.hpp | 2 + .../core/unit_test/TestCompilerMacros.cpp | 3 +- lib/kokkos/core/unit_test/TestComplex.hpp | 41 +- .../core/unit_test/TestDetectionIdiom.cpp | 2 + .../unit_test/TestExecSpacePartitioning.hpp | 3 +- .../unit_test/TestExecSpaceThreadSafety.hpp | 6 - ...onEnvironmentNonInitializedOrFinalized.cpp | 143 + lib/kokkos/core/unit_test/TestGraph.hpp | 187 +- .../core/unit_test/TestGraphAtomicLocks.hpp | 79 + .../core/unit_test/TestHalfConversion.hpp | 18 +- .../core/unit_test/TestHalfOperators.hpp | 52 +- .../core/unit_test/TestHostSharedPtr.hpp | 6 +- .../TestHostSharedPtrAccessOnDevice.hpp | 6 +- .../core/unit_test/TestInitializeFinalize.cpp | 115 + .../core/unit_test/TestIrregularLayout.hpp | 1 + .../unit_test/TestLegionInitialization.cpp | 69 +- lib/kokkos/core/unit_test/TestMDRange.hpp | 313 +- lib/kokkos/core/unit_test/TestMDRange_a.hpp | 8 + lib/kokkos/core/unit_test/TestMDRange_b.hpp | 11 +- lib/kokkos/core/unit_test/TestMDRange_e.hpp | 7 + .../unit_test/TestMathematicalFunctions.hpp | 33 +- .../TestMathematicalSpecialFunctions.hpp | 25 +- lib/kokkos/core/unit_test/TestMultiGPU.hpp | 13 + .../unit_test/TestNonTrivialScalarTypes.hpp | 5 + .../core/unit_test/TestNumericTraits.hpp | 2 + .../core/unit_test/TestPushFinalizeHook.cpp | 114 + lib/kokkos/core/unit_test/TestRange.hpp | 71 +- ...Test_ScopeGuard.cpp => TestScopeGuard.cpp} | 41 +- lib/kokkos/core/unit_test/TestSharedSpace.cpp | 6 +- .../TestSpaceAwareAccessorAccessViolation.hpp | 2 +- .../core/unit_test/TestStringManipulation.cpp | 6 +- .../unit_test/TestTaskScheduler_single.hpp | 4 +- lib/kokkos/core/unit_test/TestTeam.hpp | 4 +- lib/kokkos/core/unit_test/TestTeamBasic.hpp | 8 +- lib/kokkos/core/unit_test/TestTeamScan.hpp | 6 +- lib/kokkos/core/unit_test/TestTeamVector.hpp | 11 +- lib/kokkos/core/unit_test/TestViewAPI.hpp | 24 +- lib/kokkos/core/unit_test/TestViewAPI_e.hpp | 18 +- .../core/unit_test/TestViewBadAlloc.hpp | 6 +- lib/kokkos/core/unit_test/TestViewCopy_a.hpp | 12 + lib/kokkos/core/unit_test/TestViewCopy_b.hpp | 12 + .../core/unit_test/TestViewCtorProp.hpp | 14 +- .../TestViewEmptyRuntimeUnmanaged.hpp | 6 - .../core/unit_test/TestViewMapping_a.hpp | 133 +- .../TestViewMemoryAccessViolation.hpp | 8 +- .../unit_test/TestViewOutOfBoundsAccess.hpp | 4 +- lib/kokkos/core/unit_test/TestViewSubview.hpp | 28 +- lib/kokkos/core/unit_test/TestView_64bit.hpp | 2 +- .../unit_test/TestWithoutInitializing.hpp | 6 +- lib/kokkos/core/unit_test/UnitTestMain.cpp | 1 - .../core/unit_test/UnitTestMainInit.cpp | 1 - .../unit_test/UnitTest_PushFinalizeHook.cpp | 102 - .../UnitTest_PushFinalizeHook_terminate.cpp | 57 - .../configuration/test-code/Makefile | 1 + .../unit_test/cuda/TestCuda_InterOp_Graph.cpp | 30 +- .../cuda/TestCuda_InterOp_StreamsMultiGPU.cpp | 69 +- .../core/unit_test/cuda/TestCuda_Spaces.cpp | 42 +- .../default/TestDefaultDeviceTypeViewAPI.cpp | 84 +- .../headers_self_contained/CMakeLists.txt | 5 +- .../hip/TestHIP_BlocksizeDeduction.cpp | 8 +- .../unit_test/hip/TestHIP_InterOp_Graph.cpp | 35 + .../hip/TestHIP_InterOp_StreamsMultiGPU.cpp | 153 + .../core/unit_test/hip/TestHIP_ScanUnit.cpp | 4 +- .../core/unit_test/hip/TestHIP_Spaces.cpp | 24 +- .../hip/TestHIP_UnifiedMemory_ZeroMemset.cpp | 27 +- .../core/unit_test/hpx/TestHPX_InParallel.cpp | 8 + .../incremental/Test01_execspace.hpp | 12 +- lib/kokkos/core/unit_test/standalone/Makefile | 2 + .../unit_test/sycl/TestSYCL_InterOp_Graph.cpp | 40 +- .../core/unit_test/sycl/TestSYCL_Spaces.cpp | 26 +- .../tools/TestWithoutInitializing.cpp | 4 +- .../tools/include/ToolTestingUtilities.hpp | 30 +- .../core/unit_test/view/TestBasicView.hpp | 16 +- .../view/TestBasicViewMDSpanConversion.cpp | 36 +- .../build_cmake_installed/CMakeLists.txt | 4 +- .../CMakeLists.txt | 15 +- lib/kokkos/example/make_buildlink/Makefile | 1 + lib/kokkos/example/query_device/Makefile | 3 + .../example/relocatable_function/Makefile | 2 + .../example/tutorial/01_hello_world/Makefile | 2 + .../tutorial/01_hello_world_lambda/Makefile | 2 + .../tutorial/02_simple_reduce/Makefile | 2 + .../tutorial/02_simple_reduce_lambda/Makefile | 1 + .../example/tutorial/03_simple_view/Makefile | 1 + .../tutorial/03_simple_view_lambda/Makefile | 1 + .../tutorial/04_simple_memoryspaces/Makefile | 1 + .../tutorial/05_simple_atomics/Makefile | 1 + .../tutorial/06_simple_mdrangepolicy/Makefile | 1 + .../Advanced_Views/01_data_layouts/Makefile | 1 + .../Advanced_Views/02_memory_traits/Makefile | 1 + .../Advanced_Views/03_subviews/Makefile | 1 + .../Advanced_Views/04_dualviews/Makefile | 1 + .../Advanced_Views/04_dualviews/dual_view.cpp | 4 +- .../Advanced_Views/05_NVIDIA_UVM/Makefile | 2 + .../Advanced_Views/06_AtomicViews/Makefile | 1 + .../07_Overlapping_DeepCopy/Makefile | 2 + .../Algorithms/01_random_numbers/Makefile | 1 + .../01_random_numbers/random_numbers.cpp | 10 +- .../01_thread_teams/Makefile | 1 + .../01_thread_teams_lambda/Makefile | 1 + .../02_nested_parallel_for/Makefile | 1 + .../03_vectorization/Makefile | 1 + .../04_team_scan/Makefile | 1 + .../example/tutorial/launch_bounds/Makefile | 1 + lib/kokkos/example/virtual_functions/Makefile | 2 + lib/kokkos/generate_makefile.bash | 1 + lib/kokkos/gnu_generate_makefile.bash | 1 + lib/kokkos/master_history.txt | 1 + lib/kokkos/simd/src/Kokkos_SIMD.hpp | 87 +- lib/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp | 2708 ++++++------ lib/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp | 3614 ++++++++++------- lib/kokkos/simd/src/Kokkos_SIMD_Common.hpp | 234 +- .../simd/src/Kokkos_SIMD_Common_Math.hpp | 222 +- lib/kokkos/simd/src/Kokkos_SIMD_NEON.hpp | 2788 +++++++------ lib/kokkos/simd/src/Kokkos_SIMD_Scalar.hpp | 581 ++- .../unit_tests/include/SIMDTesting_Ops.hpp | 385 +- .../include/SIMDTesting_Utilities.hpp | 84 +- .../unit_tests/include/TestSIMD_Condition.hpp | 8 +- .../include/TestSIMD_Construction.hpp | 51 +- .../include/TestSIMD_Conversions.hpp | 78 +- .../include/TestSIMD_GeneratorCtors.hpp | 36 +- .../unit_tests/include/TestSIMD_MaskOps.hpp | 8 +- .../unit_tests/include/TestSIMD_MathOps.hpp | 106 +- .../include/TestSIMD_Reductions.hpp | 93 +- .../unit_tests/include/TestSIMD_ShiftOps.hpp | 90 +- .../include/TestSIMD_WhereExpressions.hpp | 146 +- .../include/desul/atomics/Fetch_Op_HIP.hpp | 6 + .../__p0009_bits/compressed_pair.hpp | 50 +- .../experimental/__p0009_bits/config.hpp | 189 +- .../__p0009_bits/default_accessor.hpp | 2 +- .../__p0009_bits/dynamic_extent.hpp | 2 +- .../experimental/__p0009_bits/extents.hpp | 41 +- .../__p0009_bits/full_extent_t.hpp | 2 +- .../experimental/__p0009_bits/layout_left.hpp | 30 +- .../__p0009_bits/layout_right.hpp | 32 +- .../__p0009_bits/layout_stride.hpp | 82 +- .../experimental/__p0009_bits/macros.hpp | 344 +- .../experimental/__p0009_bits/mdspan.hpp | 92 +- .../__p0009_bits/no_unique_address.hpp | 14 +- .../__p0009_bits/trait_backports.hpp | 14 +- .../experimental/__p0009_bits/type_list.hpp | 3 +- .../experimental/__p0009_bits/utility.hpp | 2 +- .../experimental/__p1684_bits/mdarray.hpp | 64 +- .../__p2630_bits/strided_slice.hpp | 6 +- .../__p2630_bits/submdspan_mapping.hpp | 16 +- 384 files changed, 13243 insertions(+), 9477 deletions(-) create mode 100644 lib/kokkos/CTestConfig.cmake create mode 100644 lib/kokkos/algorithms/perf_test/CMakeLists.txt create mode 100644 lib/kokkos/algorithms/perf_test/test_inclusive_scan.cpp delete mode 100644 lib/kokkos/cmake/intel.cmake create mode 100644 lib/kokkos/core/perf_test/test_reduction.cpp create mode 100644 lib/kokkos/core/src/HIP/Kokkos_HIP_IsXnack.cpp create mode 100644 lib/kokkos/core/src/HIP/Kokkos_HIP_IsXnack.hpp create mode 100644 lib/kokkos/core/src/View/Kokkos_ViewAccessPreconditionsCheck.hpp create mode 100644 lib/kokkos/core/src/View/Kokkos_ViewCommonType.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_GraphNodeThenImpl.hpp rename lib/kokkos/core/unit_test/{TestInit.hpp => KokkosExecutionEnvironmentNeverInitializedFixture.hpp} (55%) create mode 100644 lib/kokkos/core/unit_test/TestExecutionEnvironmentNonInitializedOrFinalized.cpp create mode 100644 lib/kokkos/core/unit_test/TestGraphAtomicLocks.hpp create mode 100644 lib/kokkos/core/unit_test/TestInitializeFinalize.cpp create mode 100644 lib/kokkos/core/unit_test/TestPushFinalizeHook.cpp rename lib/kokkos/core/unit_test/{UnitTest_ScopeGuard.cpp => TestScopeGuard.cpp} (76%) delete mode 100644 lib/kokkos/core/unit_test/UnitTest_PushFinalizeHook.cpp delete mode 100644 lib/kokkos/core/unit_test/UnitTest_PushFinalizeHook_terminate.cpp create mode 100644 lib/kokkos/core/unit_test/hip/TestHIP_InterOp_StreamsMultiGPU.cpp diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index 84bbd03585..7d39bd36ae 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,72 @@ # CHANGELOG +## 4.6.00 + +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.01...4.6.00) + +### Features: + +* Kokkos::Graph: Allow adding tasks to the graph via a `then`-node [\#7629](https://github.com/kokkos/kokkos/pull/7629) +* Kokkos::Graph: Allow construction from CUDA/HIP graph [\#7664](https://github.com/kokkos/kokkos/pull/7664) +* HIP: Add experimental support for using multiple GPUs from one process [\#7130](https://github.com/kokkos/kokkos/pull/7130) + +### Backend and Architecture Enhancements: + +#### CUDA: +* Improved reduction performance, in particular on H100 and newer [\#7823](https://github.com/kokkos/kokkos/pull/7823) + +#### HIP: +* Change block size deduction to prefer smaller blocks/teams [\#7509](https://github.com/kokkos/kokkos/pull/7509) +* Allocate memory with stream ordered semantics (i.e. use `hipMallocAsync`) [\#7659](https://github.com/kokkos/kokkos/pull/7659) +* Fix a segfault when a virtual function called inside a kernel requires too many registers[\#7660](https://github.com/kokkos/kokkos/pull/7660) + +#### SYCL: +* Improve sorting performance for non-contiguous views [\#7502](https://github.com/kokkos/kokkos/pull/7502) + +#### Serial: +* Reduce fences overhead when using `Kokkos_ENABLE_ATOMICS_BYPASS` [\#7821](https://github.com/kokkos/kokkos/pull/7821) + +### General Enhancements +* Allow use of `kokkos_check` in `Config.cmake` without warnings [\#7669](https://github.com/kokkos/kokkos/pull/7669) +* Add simd compound assignments and update simd reductions [\#7486](https://github.com/kokkos/kokkos/pull/7486) +* Improve performance of the `inclusive_scan` algorithm with Cuda and HIP [\#7542](https://github.com/kokkos/kokkos/pull/7542) +* Reduce tooling interface overhead (don't pay for what you don't use) [\#7817](https://github.com/kokkos/kokkos/pull/7817) +* Avoid storing the view in `RandomAccessIterator` to increase performance [\#7304](https://github.com/kokkos/kokkos/pull/7304) +* Make `RandomAccessIterator` fulfill `std::random_access_iterator concept` [\#7451](https://github.com/kokkos/kokkos/pull/7451) +* Include information about support for system allocated memory in `print_configuration` (Cuda and HIP) [\#7673](https://github.com/kokkos/kokkos/pull/7673) + +### Build System Changes +* Add support for Zen 4 AMD microarchitecture [\#7550](https://github.com/kokkos/kokkos/pull/7550) +* Enable NVIDIA Grace architecture with NVHPC [\#7858](https://github.com/kokkos/kokkos/pull/7858) +* Support static library builds when using CUDA as CMake language [\#7830](https://github.com/kokkos/kokkos/pull/7830) + +### Incompatibilities (i.e. breaking changes) +* Change SIMD comparison operator to return `simd_mask` instead of `bool` [\#7781](https://github.com/kokkos/kokkos/pull/7781) +* Remove classic Intel compiler (icpc) support [\#7737](https://github.com/kokkos/kokkos/pull/7737) +* Remove `operator[]` overloads of Kokkos `basic_simd` and `basic_simd_mask` that return a reference [\#7630](https://github.com/kokkos/kokkos/pull/7630) + +### Deprecations +* Deprecate `StaticCrsGraph` and move it to Kokkos Kernels into `KokkosSparse::` [\#7516](https://github.com/kokkos/kokkos/pull/7516) +* Deprecate `native_simd` and hide `simd_abi` [\#7472](https://github.com/kokkos/kokkos/pull/7472) +* Deprecate Makefile support [\#7613](https://github.com/kokkos/kokkos/pull/7613) +* DualView: Deprecate direct access to d_view and h_view [\#7716](https://github.com/kokkos/kokkos/pull/7716) + +### Bug Fixes +* Fix performance bug affecting `atomic_fetch_{add,sub,min,max,and,or,xor}` on integral types `long` and `unsigned long` with HIP [\#7816](https://github.com/kokkos/kokkos/pull/7816) +* Fix execution of ranges with more than 2B elements [\#7797](https://github.com/kokkos/kokkos/pull/7797) +* Fix clean target when embedding Kokkos in another project [\#7557](https://github.com/kokkos/kokkos/pull/7557) +* Fix Zen3 flag for NVHPC [\#7558](https://github.com/kokkos/kokkos/pull/7558) +* graph: nodes must be stored by the graph [\#7619](https://github.com/kokkos/kokkos/pull/7619) +* Make sure lock arrays are on device before launching a graph [\#7685](https://github.com/kokkos/kokkos/pull/7685) +* Performance bug in `RangePolicy`: construct error message if and only if the precondition is violated [\#7809](https://github.com/kokkos/kokkos/pull/7809) +* simd: fix a bug in scalar min/max [\#7813](https://github.com/kokkos/kokkos/pull/7813) +* simd: fix a bug in non-masked reductions [\#7845](https://github.com/kokkos/kokkos/pull/7845) +* Cuda: fix incorrect iteration in `MDRangePolicy` of rank > 4 for high iteration counts [\#7724](https://github.com/kokkos/kokkos/pull/7724) +* Cuda: ignore gcc assembler options in `nvcc-wrapper` [\#7492](https://github.com/kokkos/kokkos/pull/7492) +* Build system: hint to `ARCH_NATIVE` if ARMv9 Grace arch is not explicitly supported by the compiler [\#7862](https://github.com/kokkos/kokkos/pull/7862) +* Use right arch for MI300A in makefiles [\#7786](https://github.com/kokkos/kokkos/pull/7786) +* Fix compiling BasicView on MSVC [\#7751](https://github.com/kokkos/kokkos/pull/7751) + ## 4.5.01 [Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.00...4.5.01) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 6a70bea149..7a4dc73444 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -148,8 +148,8 @@ elseif(NOT CMAKE_SIZEOF_VOID_P EQUAL 8) endif() set(Kokkos_VERSION_MAJOR 4) -set(Kokkos_VERSION_MINOR 5) -set(Kokkos_VERSION_PATCH 1) +set(Kokkos_VERSION_MINOR 6) +set(Kokkos_VERSION_PATCH 0) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") diff --git a/lib/kokkos/CTestConfig.cmake b/lib/kokkos/CTestConfig.cmake new file mode 100644 index 0000000000..deb80ab76a --- /dev/null +++ b/lib/kokkos/CTestConfig.cmake @@ -0,0 +1,4 @@ +set(CTEST_PROJECT_NAME Kokkos) +set(CTEST_NIGHTLY_START_TIME 01:00:00 UTC) +set(CTEST_SUBMIT_URL https://my.cdash.org/submit.php?project=Kokkos) +set(CTEST_DROP_SITE_CDASH TRUE) diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index abdfb7a316..65c576bb8d 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -1,18 +1,26 @@ # Default settings common options. -#SPARTA specific settings: +#LAMMPS specific settings: + +KOKKOS_USE_DEPRECATED_MAKEFILES=1 + ifndef KOKKOS_PATH KOKKOS_PATH=../../lib/kokkos endif CXXFLAGS=$(CCFLAGS) ifeq ($(mode),shared) -CXXFLAGS += $(SHFLAGS) + CXXFLAGS += $(SHFLAGS) +endif + + +ifneq ($(KOKKOS_USE_DEPRECATED_MAKEFILES), 1) + $(error Makefile support is deprecated. Only CMake builds will be supported from Kokkos 5 on. Set KOKKOS_USE_DEPRECATED_MAKEFILES=1 to silence this error.) endif KOKKOS_VERSION_MAJOR = 4 -KOKKOS_VERSION_MINOR = 5 -KOKKOS_VERSION_PATCH = 1 +KOKKOS_VERSION_MINOR = 6 +KOKKOS_VERSION_PATCH = 0 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -24,7 +32,7 @@ KOKKOS_DEVICES ?= "OpenMP" # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace # IBM: Power8,Power9 # AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX942_APU,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 -# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 +# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3,Zen4 # Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC KOKKOS_ARCH ?= "" # Options: yes,no @@ -442,11 +450,14 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ # AMD based. KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX) +KOKKOS_INTERNAL_USE_ARCH_ZEN4 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen4) KOKKOS_INTERNAL_USE_ARCH_ZEN3 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen3) KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0) - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 0) - KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN4), 0) + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0) + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 0) + KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen) + endif endif endif @@ -463,8 +474,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A) endif KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940) -KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942_APU) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU), 0) + KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942) +endif KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030) @@ -857,6 +870,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN4), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN4") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") + + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xCORE-AVX512 + KOKKOS_LDFLAGS += -xCORE-AVX512 + else + KOKKOS_CXXFLAGS += -march=znver4 -mtune=znver4 + KOKKOS_LDFLAGS += -march=znver4 -mtune=znver4 + endif +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX") diff --git a/lib/kokkos/README.md b/lib/kokkos/README.md index 56159b35c2..13d99c0bad 100644 --- a/lib/kokkos/README.md +++ b/lib/kokkos/README.md @@ -18,24 +18,24 @@ Kokkos is a [Linux Foundation](https://linuxfoundation.org) project. To start learning about Kokkos: -- [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities. +- [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/tutorials-and-examples/video-lectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities. - [Programming guide](https://kokkos.org/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch. - [API reference](https://kokkos.org/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.org/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.org/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.org/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.org/kokkos-core-wiki/API/alphabetical.html). -- [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/usecases.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability. +- [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/tutorials-and-examples/use-cases-and-examples.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability. ## Obtaining Kokkos The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest). -The current release is [4.5.01](https://github.com/kokkos/kokkos/releases/tag/4.5.01). +The current release is [4.6.00](https://github.com/kokkos/kokkos/releases/tag/4.6.00). ```bash -curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz +curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.6.00/kokkos-4.6.00.tar.gz # Or with wget -wget https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz +wget https://github.com/kokkos/kokkos/releases/download/4.6.00/kokkos-4.6.00.tar.gz ``` To clone the latest development version of Kokkos from GitHub: @@ -47,7 +47,7 @@ git clone -b develop https://github.com/kokkos/kokkos.git ### Building Kokkos To build Kokkos, you will need to have a C++ compiler that supports C++17 or later. -All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/requirements.html). +All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/get-started/requirements.html). Building and installation instructions are described [here](https://kokkos.org/kokkos-core-wiki/building.html). diff --git a/lib/kokkos/algorithms/CMakeLists.txt b/lib/kokkos/algorithms/CMakeLists.txt index 73ce9f7ec5..e257e4ccce 100644 --- a/lib/kokkos/algorithms/CMakeLists.txt +++ b/lib/kokkos/algorithms/CMakeLists.txt @@ -5,3 +5,7 @@ endif() if(NOT ((KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) OR KOKKOS_ENABLE_OPENACC)) kokkos_add_test_directories(unit_tests) endif() + +if(Kokkos_ENABLE_BENCHMARKS) + add_subdirectory(perf_test) +endif() diff --git a/lib/kokkos/algorithms/perf_test/CMakeLists.txt b/lib/kokkos/algorithms/perf_test/CMakeLists.txt new file mode 100644 index 0000000000..a41d3f891b --- /dev/null +++ b/lib/kokkos/algorithms/perf_test/CMakeLists.txt @@ -0,0 +1,63 @@ +# FIXME: The following logic should be moved from here and also from `core/perf_test/CMakeLists.txt` to +# the root `CMakeLists.txt` in the form of a macro +# Find or download google/benchmark library +find_package(benchmark QUIET 1.5.6) +if(benchmark_FOUND) + message(STATUS "Using google benchmark found in ${benchmark_DIR}") +else() + message(STATUS "No installed google benchmark found, fetching from GitHub") + include(FetchContent) + set(BENCHMARK_ENABLE_TESTING OFF) + + list(APPEND CMAKE_MESSAGE_INDENT "[benchmark] ") + FetchContent_Declare( + googlebenchmark + DOWNLOAD_EXTRACT_TIMESTAMP FALSE + URL https://github.com/google/benchmark/archive/refs/tags/v1.7.1.tar.gz + URL_HASH MD5=0459a6c530df9851bee6504c3e37c2e7 + ) + FetchContent_MakeAvailable(googlebenchmark) + list(POP_BACK CMAKE_MESSAGE_INDENT) + + # Suppress clang-tidy diagnostics on code that we do not have control over + if(CMAKE_CXX_CLANG_TIDY) + set_target_properties(benchmark PROPERTIES CXX_CLANG_TIDY "") + endif() + + # FIXME: Check whether the following target_compile_options are needed. + # If so, clarify why. + target_compile_options(benchmark PRIVATE -w) + target_compile_options(benchmark_main PRIVATE -w) +endif() + +# FIXME: This function should be moved from here and also from `core/perf_test/CMakeLists.txt` to +# the root `CMakeLists.txt` +# FIXME: Could NAME be a one_value_keyword specified in cmake_parse_arguments? +function(KOKKOS_ADD_BENCHMARK NAME) + cmake_parse_arguments(BENCHMARK "" "" "SOURCES" ${ARGN}) + if(DEFINED BENCHMARK_UNPARSED_ARGUMENTS) + message(WARNING "Unexpected arguments when adding a benchmark: " ${BENCHMARK_UNPARSED_ARGUMENTS}) + endif() + + set(BENCHMARK_NAME Kokkos_${NAME}) + # FIXME: BenchmarkMain.cpp and Benchmark_Context.cpp should be moved to a common location from which + # they can be used by all performance tests. + list(APPEND BENCHMARK_SOURCES ../../core/perf_test/BenchmarkMain.cpp ../../core/perf_test/Benchmark_Context.cpp) + + add_executable(${BENCHMARK_NAME} ${BENCHMARK_SOURCES}) + target_link_libraries(${BENCHMARK_NAME} PRIVATE benchmark::benchmark Kokkos::kokkos impl_git_version) + target_include_directories(${BENCHMARK_NAME} SYSTEM PRIVATE ${benchmark_SOURCE_DIR}/include) + + # FIXME: This alone will not work. It might need an architecture and standard which need to be defined on target level. + # It will potentially go away with #7582. + foreach(SOURCE_FILE ${BENCHMARK_SOURCES}) + set_source_files_properties(${SOURCE_FILE} PROPERTIES LANGUAGE ${KOKKOS_COMPILE_LANGUAGE}) + endforeach() + + string(TIMESTAMP BENCHMARK_TIME "%Y-%m-%d_T%H-%M-%S" UTC) + set(BENCHMARK_ARGS --benchmark_counters_tabular=true --benchmark_out=${BENCHMARK_NAME}_${BENCHMARK_TIME}.json) + + add_test(NAME ${BENCHMARK_NAME} COMMAND ${BENCHMARK_NAME} ${BENCHMARK_ARGS}) +endfunction() + +kokkos_add_benchmark(PerformanceTest_InclusiveScan SOURCES test_inclusive_scan.cpp) diff --git a/lib/kokkos/algorithms/perf_test/test_inclusive_scan.cpp b/lib/kokkos/algorithms/perf_test/test_inclusive_scan.cpp new file mode 100644 index 0000000000..a0a5de6b07 --- /dev/null +++ b/lib/kokkos/algorithms/perf_test/test_inclusive_scan.cpp @@ -0,0 +1,191 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include +#include + +#include + +#include +#include +#include +// FIXME: Benchmark_Context.hpp should be moved to a common location +#include "../../core/perf_test/Benchmark_Context.hpp" + +namespace { + +namespace KE = Kokkos::Experimental; + +using ExecSpace = Kokkos::DefaultExecutionSpace; +using HostExecSpace = Kokkos::DefaultHostExecutionSpace; + +// A tag struct to identify when inclusive scan with the implicit sum +// based binary operation needs to be called. +template +struct ImpSumBinOp; + +template +struct SumFunctor { + KOKKOS_FUNCTION + ValueType operator()(const ValueType& a, const ValueType& b) const { + return (a + b); + } +}; + +template +struct MaxFunctor { + KOKKOS_FUNCTION + ValueType operator()(const ValueType& a, const ValueType& b) const { + if (a > b) + return a; + else + return b; + } +}; + +// Helper to obtain last element of a view +template +T obtain_last_elem(const Kokkos::View& v) { + T last_element; + Kokkos::deep_copy(last_element, Kokkos::subview(v, v.extent(0) - 1)); + return last_element; +} + +// Helper to allocate input and output views +template +auto prepare_views(const std::size_t kProbSize) { + Kokkos::View in{"input", kProbSize}; + Kokkos::View out{"output", kProbSize}; + + auto h_in = Kokkos::create_mirror_view(in); + + for (std::size_t i = 0; i < kProbSize; ++i) { + h_in(i) = i; + } + + Kokkos::deep_copy(in, h_in); + + return std::make_tuple(in, out, h_in); +} + +// Perform scan with a reference implementation +template > +T ref_scan(const ViewType& h_in, ScanFunctor scan_functor = ScanFunctor()) { + std::size_t view_size = h_in.extent(0); + + Kokkos::View h_out("output", view_size); + + // FIXME: We have GCC 8.4.0 based check in our ORNL Jenkins CI. + // std::inclusive_scan is available only from GCC 9.3. Since, GCC 9.1 + // std::inclusive_scan that takes execution policy is available. However, + // there is error with header before GCC 10.1. + h_out(0) = h_in(0); + + for (std::size_t i = 1; i < view_size; ++i) { + h_out(i) = scan_functor(h_in(i), h_out(i - 1)); + } + + return h_out(view_size - 1); +} + +// Inclusive Scan with default binary operation (sum) or user provided functor +// Note: The nature of the functor must be compatible with the +// elements in the input and output views +template class ScanFunctor = ImpSumBinOp> +auto inclusive_scan(const Kokkos::View& in, + const Kokkos::View& out, T res_check) { + ExecSpace().fence(); + Kokkos::Timer timer; + + if constexpr (std::is_same_v, ImpSumBinOp>) { + KE::inclusive_scan("Default scan", ExecSpace(), KE::cbegin(in), + KE::cend(in), KE::begin(out)); + } else { + KE::inclusive_scan("Scan using a functor", ExecSpace(), KE::cbegin(in), + KE::cend(in), KE::begin(out), ScanFunctor()); + } + + ExecSpace().fence(); + double time_scan = timer.seconds(); + + T res_scan = obtain_last_elem(out); + bool passed = (res_check == res_scan); + + return std::make_tuple(time_scan, passed); +} + +// Benchmark: Inclusive Scan with default binary operation (sum) +// or user provided functor +template class ScanFunctor = ImpSumBinOp> +void BM_inclusive_scan(benchmark::State& state) { + const std::size_t kProbSize = state.range(0); + + auto [in, out, h_in] = prepare_views(kProbSize); + + T res_check; + + if constexpr (std::is_same_v, ImpSumBinOp>) { + res_check = ref_scan(h_in); + } else { + res_check = ref_scan(h_in, ScanFunctor()); + } + + double time_scan = 0.; + bool passed = false; + + for (auto _ : state) { + if constexpr (std::is_same_v, ImpSumBinOp>) { + std::tie(time_scan, passed) = inclusive_scan(in, out, res_check); + } else { + std::tie(time_scan, passed) = + inclusive_scan(in, out, res_check); + } + + KokkosBenchmark::report_results(state, in, 2, time_scan); + state.counters["Passed"] = passed; + } +} + +constexpr std::size_t PROB_SIZE = 100'000'000; + +} // anonymous namespace + +// FIXME: Add logic to pass min. warm-up time. Also, the value should be set +// by the user. Say, via the environment variable BENCHMARK_MIN_WARMUP_TIME. + +BENCHMARK(BM_inclusive_scan)->Arg(PROB_SIZE)->UseManualTime(); +BENCHMARK(BM_inclusive_scan)->Arg(PROB_SIZE)->UseManualTime(); +BENCHMARK(BM_inclusive_scan)->Arg(PROB_SIZE)->UseManualTime(); +BENCHMARK(BM_inclusive_scan) + ->Arg(PROB_SIZE) + ->UseManualTime(); +BENCHMARK(BM_inclusive_scan) + ->Arg(PROB_SIZE) + ->UseManualTime(); +BENCHMARK(BM_inclusive_scan) + ->Arg(PROB_SIZE) + ->UseManualTime(); +BENCHMARK(BM_inclusive_scan) + ->Arg(PROB_SIZE) + ->UseManualTime(); +BENCHMARK(BM_inclusive_scan) + ->Arg(PROB_SIZE) + ->UseManualTime(); +BENCHMARK(BM_inclusive_scan) + ->Arg(PROB_SIZE) + ->UseManualTime(); diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index b28ea4c2ca..54a853fa55 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -587,11 +587,13 @@ struct Random_XorShift1024_State { int state_idx) : state_(&v(state_idx, 0)), stride_(v.stride_1()) {} + // NOLINTBEGIN(bugprone-implicit-widening-of-multiplication-result) KOKKOS_FUNCTION uint64_t operator[](const int i) const { return state_[i * stride_]; } KOKKOS_FUNCTION uint64_t& operator[](const int i) { return state_[i * stride_]; } + // NOLINTEND(bugprone-implicit-widening-of-multiplication-result) }; template @@ -670,7 +672,12 @@ struct Random_UniqueIndex> { View>; KOKKOS_FUNCTION static int get_state_idx(const locks_view_type& locks_) { +#if defined(KOKKOS_COMPILER_INTEL_LLVM) && \ + KOKKOS_COMPILER_INTEL_LLVM >= 20250000 + auto item = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); +#else auto item = sycl::ext::oneapi::experimental::this_nd_item<3>(); +#endif std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1), item.get_local_id(0)}; std::size_t blockIdx[3] = {item.get_group(2), item.get_group(1), diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp index 8e7de32a07..b093b72ad6 100644 --- a/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp @@ -45,7 +45,7 @@ struct BinOp1D { // For integral types the number of bins may be larger than the range // in which case we can exactly have one unique value per bin // and then don't need to sort bins. - if (std::is_integral::value && + if (std::is_integral_v && (static_cast(max) - static_cast(min)) <= static_cast(max_bins)) { mul_ = 1.; diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp index 20026c77e4..308e9e3a00 100644 --- a/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp @@ -53,13 +53,9 @@ void sort(const ExecutionSpace& exec, if constexpr (Impl::better_off_calling_std_sort_v) { exec.fence("Kokkos::sort without comparator use std::sort"); - if (view.span_is_contiguous()) { - std::sort(view.data(), view.data() + view.size()); - } else { - auto first = ::Kokkos::Experimental::begin(view); - auto last = ::Kokkos::Experimental::end(view); - std::sort(first, last); - } + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + std::sort(first, last); } else { Impl::sort_device_view_without_comparator(exec, view); } @@ -111,13 +107,9 @@ void sort(const ExecutionSpace& exec, if constexpr (Impl::better_off_calling_std_sort_v) { exec.fence("Kokkos::sort with comparator use std::sort"); - if (view.span_is_contiguous()) { - std::sort(view.data(), view.data() + view.size(), comparator); - } else { - auto first = ::Kokkos::Experimental::begin(view); - auto last = ::Kokkos::Experimental::end(view); - std::sort(first, last, comparator); - } + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + std::sort(first, last, comparator); } else { Impl::sort_device_view_with_comparator(exec, view, comparator); } diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp index 2a8f761d9b..f17d254b0b 100644 --- a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp @@ -47,6 +47,7 @@ #ifdef _CubLog #undef _CubLog #endif +// NOLINTNEXTLINE(bugprone-reserved-identifier) #define _CubLog #include #include @@ -65,12 +66,24 @@ #include #endif -#if defined(KOKKOS_ENABLE_ONEDPL) && \ - (ONEDPL_VERSION_MAJOR > 2022 || \ - (ONEDPL_VERSION_MAJOR == 2022 && ONEDPL_VERSION_MINOR >= 2)) -#define KOKKOS_ONEDPL_HAS_SORT_BY_KEY +#ifdef KOKKOS_ENABLE_ONEDPL +#define KOKKOS_IMPL_ONEDPL_VERSION \ + ONEDPL_VERSION_MAJOR * 10000 + ONEDPL_VERSION_MINOR * 100 + \ + ONEDPL_VERSION_PATCH +#define KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(MAJOR, MINOR, PATCH) \ + (KOKKOS_IMPL_ONEDPL_VERSION >= ((MAJOR)*10000 + (MINOR)*100 + (PATCH))) + +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 2, 0) +#define KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wunused-local-typedef" +#pragma GCC diagnostic ignored "-Wunused-parameter" +#pragma GCC diagnostic ignored "-Wunused-variable" #include #include +#pragma GCC diagnostic pop +#endif #endif namespace Kokkos::Impl { @@ -141,12 +154,18 @@ void sort_by_key_rocthrust( #endif #if defined(KOKKOS_ENABLE_ONEDPL) + +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) +template +inline constexpr bool sort_on_device_v = true; +#else template inline constexpr bool sort_on_device_v = std::is_same_v || std::is_same_v; +#endif -#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY +#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY template void sort_by_key_onedpl( @@ -154,6 +173,14 @@ void sort_by_key_onedpl( const Kokkos::View& keys, const Kokkos::View& values, MaybeComparator&&... maybeComparator) { + auto queue = exec.sycl_queue(); + auto policy = oneapi::dpl::execution::make_device_policy(queue); +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + oneapi::dpl::sort_by_key(policy, ::Kokkos::Experimental::begin(keys), + ::Kokkos::Experimental::end(keys), + ::Kokkos::Experimental::begin(values), + std::forward(maybeComparator)...); +#else if (keys.stride(0) != 1 && values.stride(0) != 1) { Kokkos::abort( "SYCL sort_by_key only supports rank-1 Views with stride(0) = 1."); @@ -161,11 +188,10 @@ void sort_by_key_onedpl( // Can't use Experimental::begin/end here since the oneDPL then assumes that // the data is on the host. - auto queue = exec.sycl_queue(); - auto policy = oneapi::dpl::execution::make_device_policy(queue); const int n = keys.extent(0); oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(), std::forward(maybeComparator)...); +#endif } #endif #endif @@ -336,12 +362,18 @@ void sort_by_key_device_view_without_comparator( const Kokkos::SYCL& exec, const Kokkos::View& keys, const Kokkos::View& values) { -#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY +#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + sort_by_key_onedpl(exec, keys, values); +#else if (keys.stride(0) == 1 && values.stride(0) == 1) sort_by_key_onedpl(exec, keys, values); else -#endif sort_by_key_via_sort(exec, keys, values); +#endif +#else + sort_by_key_via_sort(exec, keys, values); +#endif } #endif @@ -394,12 +426,18 @@ void sort_by_key_device_view_with_comparator( const Kokkos::View& keys, const Kokkos::View& values, const ComparatorType& comparator) { -#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY +#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + sort_by_key_onedpl(exec, keys, values, comparator); +#else if (keys.stride(0) == 1 && values.stride(0) == 1) sort_by_key_onedpl(exec, keys, values, comparator); else -#endif sort_by_key_via_sort(exec, keys, values, comparator); +#endif +#else + sort_by_key_via_sort(exec, keys, values, comparator); +#endif } #endif @@ -416,7 +454,9 @@ sort_by_key_device_view_with_comparator( sort_by_key_via_sort(exec, keys, values, comparator); } -#undef KOKKOS_ONEDPL_HAS_SORT_BY_KEY +#undef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY } // namespace Kokkos::Impl +#undef KOKKOS_IMPL_ONEDPL_VERSION +#undef KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL #endif diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp index 734ce450f6..fa7c28b4d0 100644 --- a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp @@ -51,6 +51,7 @@ #ifdef _CubLog #undef _CubLog #endif +// NOLINTNEXTLINE(bugprone-reserved-identifier) #define _CubLog #include #include @@ -70,8 +71,20 @@ #endif #if defined(KOKKOS_ENABLE_ONEDPL) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wunused-local-typedef" +#pragma GCC diagnostic ignored "-Wunused-parameter" +#pragma GCC diagnostic ignored "-Wunused-variable" #include #include +#pragma GCC diagnostic pop + +#define KOKKOS_IMPL_ONEDPL_VERSION \ + ONEDPL_VERSION_MAJOR * 10000 + ONEDPL_VERSION_MINOR * 100 + \ + ONEDPL_VERSION_PATCH +#define KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(MAJOR, MINOR, PATCH) \ + (KOKKOS_IMPL_ONEDPL_VERSION >= ((MAJOR)*10000 + (MINOR)*100 + (PATCH))) #endif namespace Kokkos { @@ -221,6 +234,10 @@ void sort_onedpl(const Kokkos::SYCL& space, "SYCL execution space is not able to access the memory space " "of the View argument!"); +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + static_assert(ViewType::rank == 1, + "Kokkos::sort currently only supports rank-1 Views."); +#else static_assert( (ViewType::rank == 1) && (std::is_same_v || @@ -234,18 +251,26 @@ void sort_onedpl(const Kokkos::SYCL& space, if (view.stride(0) != 1) { Kokkos::abort("SYCL sort only supports rank-1 Views with stride(0) = 1."); } +#endif if (view.extent(0) <= 1) { return; } - // Can't use Experimental::begin/end here since the oneDPL then assumes that - // the data is on the host. auto queue = space.sycl_queue(); auto policy = oneapi::dpl::execution::make_device_policy(queue); + +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + oneapi::dpl::sort(policy, ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), + std::forward(maybeComparator)...); +#else + // Can't use Experimental::begin/end here since the oneDPL then assumes that + // the data is on the host. const int n = view.extent(0); oneapi::dpl::sort(policy, view.data(), view.data() + n, std::forward(maybeComparator)...); +#endif } #endif @@ -269,29 +294,19 @@ void copy_to_host_run_stdsort_copy_back( KE::copy(exec, view, view_dc); // run sort on the mirror of view_dc - auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); - if (view.span_is_contiguous()) { - std::sort(mv_h.data(), mv_h.data() + mv_h.size(), - std::forward(maybeComparator)...); - } else { - auto first = KE::begin(mv_h); - auto last = KE::end(mv_h); - std::sort(first, last, std::forward(maybeComparator)...); - } + auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); + auto first = KE::begin(mv_h); + auto last = KE::end(mv_h); + std::sort(first, last, std::forward(maybeComparator)...); Kokkos::deep_copy(exec, view_dc, mv_h); // copy back to argument view KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view)); } else { auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view); - if (view.span_is_contiguous()) { - std::sort(view_h.data(), view_h.data() + view_h.size(), - std::forward(maybeComparator)...); - } else { - auto first = KE::begin(view_h); - auto last = KE::end(view_h); - std::sort(first, last, std::forward(maybeComparator)...); - } + auto first = KE::begin(view_h); + auto last = KE::end(view_h); + std::sort(first, last, std::forward(maybeComparator)...); Kokkos::deep_copy(exec, view, view_h); } } @@ -332,11 +347,15 @@ void sort_device_view_without_comparator( "sort_device_view_without_comparator: supports rank-1 Views " "with LayoutLeft, LayoutRight or LayoutStride"); +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + sort_onedpl(exec, view); +#else if (view.stride(0) == 1) { sort_onedpl(exec, view); } else { copy_to_host_run_stdsort_copy_back(exec, view); } +#endif } #endif @@ -387,11 +406,15 @@ void sort_device_view_with_comparator( "sort_device_view_with_comparator: supports rank-1 Views " "with LayoutLeft, LayoutRight or LayoutStride"); +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + sort_onedpl(exec, view, comparator); +#else if (view.stride(0) == 1) { sort_onedpl(exec, view, comparator); } else { copy_to_host_run_stdsort_copy_back(exec, view, comparator); } +#endif } #endif @@ -423,4 +446,7 @@ sort_device_view_with_comparator( } // namespace Impl } // namespace Kokkos + +#undef KOKKOS_IMPL_ONEDPL_VERSION +#undef KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL #endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp index da16141f5a..2e73ace8d5 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp @@ -238,12 +238,9 @@ KOKKOS_INLINE_FUNCTION void expect_no_overlap( [[maybe_unused]] IteratorType2 s_first) { if constexpr (is_kokkos_iterator_v && is_kokkos_iterator_v) { - auto const view1 = first.view(); - auto const view2 = s_first.view(); - - std::size_t stride1 = view1.stride(0); - std::size_t stride2 = view2.stride(0); - ptrdiff_t first_diff = view1.data() - view2.data(); + std::size_t stride1 = first.stride(); + std::size_t stride2 = s_first.stride(); + ptrdiff_t first_diff = first.data() - s_first.data(); // FIXME If strides are not identical, checks may not be made // with the cost of O(1) @@ -251,8 +248,8 @@ KOKKOS_INLINE_FUNCTION void expect_no_overlap( // If first_diff == 0, there is already an overlap if (stride1 == stride2 || first_diff == 0) { [[maybe_unused]] bool is_no_overlap = (first_diff % stride1); - auto* first_pointer1 = view1.data(); - auto* first_pointer2 = view2.data(); + auto* first_pointer1 = first.data(); + auto* first_pointer2 = s_first.data(); [[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first); [[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first); KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 || diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp index ad7b8bb8ca..ef39be6366 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp @@ -150,9 +150,8 @@ KOKKOS_FUNCTION OutputIterator copy_if_team_impl( return d_first + count; } -#if defined KOKKOS_COMPILER_INTEL || \ - (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ - !defined(KOKKOS_COMPILER_MSVC)) +#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC) __builtin_unreachable(); #endif } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp index 6da992b4bb..08e04810f6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp @@ -103,7 +103,7 @@ OutputIteratorType exclusive_scan_custom_op_exespace_impl( // aliases using index_type = typename InputIteratorType::difference_type; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = TransformExclusiveScanFunctorWithValueWrapper< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -177,7 +177,7 @@ KOKKOS_FUNCTION OutputIteratorType exclusive_scan_custom_op_team_impl( // aliases using exe_space = typename TeamHandleType::execution_space; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using index_type = typename InputIteratorType::difference_type; using func_type = TransformExclusiveScanFunctorWithoutValueWrapper< exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType, diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp index 252511c5d0..928508fdfb 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp @@ -23,10 +23,11 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template struct StdNumericScanIdentityReferenceUnaryFunctor { - KOKKOS_FUNCTION - constexpr const ValueType& operator()(const ValueType& a) const { return a; } + template + KOKKOS_FUNCTION constexpr T&& operator()(T&& t) const { + return static_cast(t); + } }; } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp index 0b4acec0fe..867d0b0266 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp @@ -18,12 +18,60 @@ #define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP #include +#include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include #include +#if defined(KOKKOS_ENABLE_CUDA) + +// Workaround for `Instruction 'shfl' without '.sync' is not supported on +// .target sm_70 and higher from PTX ISA version 6.4`. +// Also see https://github.com/NVIDIA/cub/pull/170. +#if !defined(CUB_USE_COOPERATIVE_GROUPS) +#define CUB_USE_COOPERATIVE_GROUPS +#endif + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wsuggest-override" + +#if defined(KOKKOS_COMPILER_CLANG) +// Some versions of Clang fail to compile Thrust, failing with errors like +// this: +// /thrust/system/cuda/detail/core/agent_launcher.h:557:11: +// error: use of undeclared identifier 'va_printf' +// The exact combination of versions for Clang and Thrust (or CUDA) for this +// failure was not investigated, however even very recent version combination +// (Clang 10.0.0 and Cuda 10.0) demonstrated failure. +// +// Defining _CubLog here locally allows us to avoid that code path, however +// disabling some debugging diagnostics +#pragma push_macro("_CubLog") +#ifdef _CubLog +#undef _CubLog +#endif +// NOLINTNEXTLINE(bugprone-reserved-identifier) +#define _CubLog +#include +#include +#pragma pop_macro("_CubLog") +#else +#include +#include +#endif + +#pragma GCC diagnostic pop + +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +#include +#include +#endif + namespace Kokkos { namespace Experimental { namespace Impl { @@ -101,9 +149,48 @@ struct InclusiveScanDefaultFunctor { } }; -// -// exespace impl -// +// ------------------------------------------------------------- +// inclusive_scan_default_op_exespace_impl +// ------------------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +template +OutputIteratorType inclusive_scan_default_op_exespace_impl( + const std::string& label, const Cuda& ex, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest) { + const auto thrust_ex = thrust::cuda::par.on(ex.cuda_stream()); + + Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan"); + + thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest); + + Kokkos::Profiling::popRegion(); + + const auto num_elements = thrust::distance(first_from, last_from); + + return first_dest + num_elements; +} +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +template +OutputIteratorType inclusive_scan_default_op_exespace_impl( + const std::string& label, const HIP& ex, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest) { + const auto thrust_ex = thrust::hip::par.on(ex.hip_stream()); + + Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan"); + + thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest); + + Kokkos::Profiling::popRegion(); + + const auto num_elements = thrust::distance(first_from, last_from); + + return first_dest + num_elements; +} +#endif + template OutputIteratorType inclusive_scan_default_op_exespace_impl( @@ -132,11 +219,16 @@ OutputIteratorType inclusive_scan_default_op_exespace_impl( // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); + + Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan"); + ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), func_type(first_from, first_dest)); ex.fence("Kokkos::inclusive_scan_default_op: fence after operation"); + Kokkos::Profiling::popRegion(); + // return return first_dest + num_elements; } @@ -144,6 +236,49 @@ OutputIteratorType inclusive_scan_default_op_exespace_impl( // ------------------------------------------------------------- // inclusive_scan_custom_binary_op_impl // ------------------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +template +OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( + const std::string& label, const Cuda& ex, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOpType binary_op) { + const auto thrust_ex = thrust::cuda::par.on(ex.cuda_stream()); + + Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan"); + + thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest, + binary_op); + + Kokkos::Profiling::popRegion(); + + const auto num_elements = thrust::distance(first_from, last_from); + + return first_dest + num_elements; +} +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +template +OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( + const std::string& label, const HIP& ex, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOpType binary_op) { + const auto thrust_ex = thrust::hip::par.on(ex.hip_stream()); + + Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan"); + + thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest, + binary_op); + + Kokkos::Profiling::popRegion(); + + const auto num_elements = thrust::distance(first_from, last_from); + + return first_dest + num_elements; +} +#endif + template OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( @@ -160,7 +295,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( using index_type = typename InputIteratorType::difference_type; using value_type = std::remove_const_t; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = ExeSpaceTransformInclusiveScanNoInitValueFunctor< ExecutionSpace, index_type, value_type, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -168,11 +303,16 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); + + Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan"); + ::Kokkos::parallel_scan( label, RangePolicy(ex, 0, num_elements), func_type(first_from, first_dest, binary_op, unary_op_type())); ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); + Kokkos::Profiling::popRegion(); + // return return first_dest + num_elements; } @@ -195,7 +335,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( // aliases using index_type = typename InputIteratorType::difference_type; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = ExeSpaceTransformInclusiveScanWithInitValueFunctor< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -203,12 +343,17 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); + + Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan"); + ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), func_type(first_from, first_dest, binary_op, unary_op_type(), std::move(init_value))); ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); + Kokkos::Profiling::popRegion(); + // return return first_dest + num_elements; } @@ -283,7 +428,7 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( // aliases using exe_space = typename TeamHandleType::execution_space; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = TeamTransformInclusiveScanNoInitValueFunctor< exe_space, value_type, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -291,7 +436,6 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan( TeamThreadRange(teamHandle, 0, num_elements), func_type(first_from, first_dest, binary_op, unary_op_type())); @@ -325,7 +469,7 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( // aliases using exe_space = typename TeamHandleType::execution_space; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = TeamTransformInclusiveScanWithInitValueFunctor< exe_space, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp index e8c638c94c..c504673c3d 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp @@ -18,6 +18,7 @@ #define KOKKOS_RANDOM_ACCESS_ITERATOR_IMPL_HPP #include +#include // declval #include #include #include "Kokkos_Constraints.hpp" @@ -29,8 +30,29 @@ namespace Impl { template class RandomAccessIterator; +namespace { + +template +struct is_always_strided { + static_assert(is_view_v); + + constexpr static bool value = +#ifdef KOKKOS_ENABLE_IMPL_MDSPAN + decltype(std::declval().to_mdspan())::is_always_strided(); +#else + (std::is_same_v || + std::is_same_v || + std::is_same_v); +#endif +}; + +} // namespace + template -class RandomAccessIterator< ::Kokkos::View > { +class RandomAccessIterator<::Kokkos::View> { public: using view_type = ::Kokkos::View; using iterator_type = RandomAccessIterator; @@ -41,30 +63,31 @@ class RandomAccessIterator< ::Kokkos::View > { using pointer = typename view_type::pointer_type; using reference = typename view_type::reference_type; +// oneDPL needs this alias in order not to assume the data is on the host but on +// the device, see +// https://github.com/uxlfoundation/oneDPL/blob/a045eac689f9107f50ba7b42235e9e927118e483/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h#L210-L214 +#ifdef KOKKOS_ENABLE_ONEDPL + using is_passed_directly = std::true_type; +#endif + static_assert(view_type::rank == 1 && - (std::is_same_v || - std::is_same_v || - std::is_same_v), - "RandomAccessIterator only supports 1D Views with LayoutLeft, " - "LayoutRight, LayoutStride."); + is_always_strided<::Kokkos::View>::value); KOKKOS_DEFAULTED_FUNCTION RandomAccessIterator() = default; explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view) - : m_view(view) {} + : m_data(view.data()), m_stride(view.stride_0()) {} explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view, ptrdiff_t current_index) - : m_view(view), m_current_index(current_index) {} + : m_data(view.data() + current_index * view.stride_0()), + m_stride(view.stride_0()) {} #ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond template requires(std::is_constructible_v) KOKKOS_FUNCTION explicit(!std::is_convertible_v) RandomAccessIterator(const RandomAccessIterator& other) - : m_view(other.m_view), m_current_index(other.m_current_index) {} + : m_data(other.m_data), m_stride(other.m_stride) {} #else template < class OtherViewType, @@ -73,19 +96,22 @@ class RandomAccessIterator< ::Kokkos::View > { int> = 0> KOKKOS_FUNCTION explicit RandomAccessIterator( const RandomAccessIterator& other) - : m_view(other.m_view), m_current_index(other.m_current_index) {} + : m_data(other.m_data), m_stride(other.m_stride) {} template , int> = 0> KOKKOS_FUNCTION RandomAccessIterator( const RandomAccessIterator& other) - : m_view(other.m_view), m_current_index(other.m_current_index) {} + : m_data(other.m_data), m_stride(other.m_stride) {} #endif KOKKOS_FUNCTION iterator_type& operator++() { - ++m_current_index; + if constexpr (is_always_contiguous) + m_data++; + else + m_data += m_stride; return *this; } @@ -98,7 +124,10 @@ class RandomAccessIterator< ::Kokkos::View > { KOKKOS_FUNCTION iterator_type& operator--() { - --m_current_index; + if constexpr (is_always_contiguous) + m_data--; + else + m_data -= m_stride; return *this; } @@ -111,77 +140,95 @@ class RandomAccessIterator< ::Kokkos::View > { KOKKOS_FUNCTION reference operator[](difference_type n) const { - return m_view(m_current_index + n); + if constexpr (is_always_contiguous) + return *(m_data + n); + else + return *(m_data + n * m_stride); } KOKKOS_FUNCTION iterator_type& operator+=(difference_type n) { - m_current_index += n; + if constexpr (is_always_contiguous) + m_data += n; + else + m_data += n * m_stride; return *this; } KOKKOS_FUNCTION iterator_type& operator-=(difference_type n) { - m_current_index -= n; + if constexpr (is_always_contiguous) + m_data -= n; + else + m_data -= n * m_stride; return *this; } KOKKOS_FUNCTION iterator_type operator+(difference_type n) const { - return iterator_type(m_view, m_current_index + n); + auto it = *this; + it += n; + return it; + } + + friend iterator_type operator+(difference_type n, iterator_type other) { + return other + n; } KOKKOS_FUNCTION iterator_type operator-(difference_type n) const { - return iterator_type(m_view, m_current_index - n); + auto it = *this; + it -= n; + return it; } KOKKOS_FUNCTION difference_type operator-(iterator_type it) const { - return m_current_index - it.m_current_index; + if constexpr (is_always_contiguous) + return m_data - it.m_data; + else + return (m_data - it.m_data) / m_stride; } KOKKOS_FUNCTION bool operator==(iterator_type other) const { - return m_current_index == other.m_current_index && - m_view.data() == other.m_view.data(); + return m_data == other.m_data && m_stride == other.m_stride; } KOKKOS_FUNCTION bool operator!=(iterator_type other) const { - return m_current_index != other.m_current_index || - m_view.data() != other.m_view.data(); + return m_data != other.m_data || m_stride != other.m_stride; } KOKKOS_FUNCTION - bool operator<(iterator_type other) const { - return m_current_index < other.m_current_index; - } + bool operator<(iterator_type other) const { return m_data < other.m_data; } KOKKOS_FUNCTION - bool operator<=(iterator_type other) const { - return m_current_index <= other.m_current_index; - } + bool operator<=(iterator_type other) const { return m_data <= other.m_data; } KOKKOS_FUNCTION - bool operator>(iterator_type other) const { - return m_current_index > other.m_current_index; - } + bool operator>(iterator_type other) const { return m_data > other.m_data; } KOKKOS_FUNCTION - bool operator>=(iterator_type other) const { - return m_current_index >= other.m_current_index; - } + bool operator>=(iterator_type other) const { return m_data >= other.m_data; } KOKKOS_FUNCTION - reference operator*() const { return m_view(m_current_index); } + reference operator*() const { return *m_data; } KOKKOS_FUNCTION - view_type view() const { return m_view; } + pointer data() const { return m_data; } + + KOKKOS_FUNCTION + int stride() const { return m_stride; } private: - view_type m_view; - ptrdiff_t m_current_index = 0; + pointer m_data; + int m_stride; + static constexpr bool is_always_contiguous = + (std::is_same_v || + std::is_same_v); // Needed for the converting constructor accepting another iterator template @@ -192,4 +239,10 @@ class RandomAccessIterator< ::Kokkos::View > { } // namespace Experimental } // namespace Kokkos +#ifdef KOKKOS_ENABLE_SYCL +template +struct sycl::is_device_copyable< + Kokkos::Experimental::Impl::RandomAccessIterator> : std::true_type {}; +#endif + #endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp index 2863582458..75f3315473 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp @@ -52,13 +52,10 @@ struct StdUniqueFunctor { auto& val_i = m_first_from[i]; const auto& val_ip1 = m_first_from[i + 1]; - if (final_pass) { - if (!m_pred(val_i, val_ip1)) { + if (!m_pred(val_i, val_ip1)) { + if (final_pass) { m_first_dest[update] = std::move(val_i); } - } - - if (!m_pred(val_i, val_ip1)) { update += 1; } } @@ -188,6 +185,7 @@ KOKKOS_FUNCTION IteratorType unique_team_impl(const TeamHandleType& teamHandle, IteratorType result = first; IteratorType lfirst = first; while (++lfirst != last) { + // NOLINTNEXTLINE(bugprone-inc-dec-in-conditions) if (!pred(*result, *lfirst) && ++result != lfirst) { *result = std::move(*lfirst); } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp index 710d04805d..226fd49d16 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp @@ -175,9 +175,8 @@ KOKKOS_FUNCTION OutputIterator unique_copy_team_impl( d_first + count); } -#if defined KOKKOS_COMPILER_INTEL || \ - (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ - !defined(KOKKOS_COMPILER_MSVC)) +#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC) __builtin_unreachable(); #endif } diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile index d3946c149b..eaf616c5d6 100644 --- a/lib/kokkos/algorithms/unit_tests/Makefile +++ b/lib/kokkos/algorithms/unit_tests/Makefile @@ -18,6 +18,8 @@ LINK ?= $(CXX) LDFLAGS ?= override LDFLAGS += -lpthread +KOKKOS_USE_DEPRECATED_MAKEFILES=1 + include $(KOKKOS_PATH)/Makefile.kokkos KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests -I${KOKKOS_PATH}/core/unit_test/category_files diff --git a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp index 6960b912d0..ed9c2610b6 100644 --- a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -281,7 +281,7 @@ struct test_random_scalar { double covariance_eps = result.covariance / num_draws / 2 / variance_expect; #if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT - if (!std::is_same::value) { + if (!std::is_same_v) { #endif EXPECT_LT(std::abs(mean_eps), tolerance); EXPECT_LT(std::abs(variance_eps), 1.5 * tolerance); @@ -312,7 +312,7 @@ struct test_random_scalar { (result.covariance / HIST_DIM1D - covariance_expect) / mean_expect; #if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT - if (std::is_same::value) { + if (std::is_same_v) { mean_eps_expect = 0.0003; variance_eps_expect = 1.0; covariance_eps_expect = 5.0e4; @@ -320,7 +320,7 @@ struct test_random_scalar { #endif #if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT - if (!std::is_same::value) { + if (!std::is_same_v) { #endif EXPECT_LT(std::abs(mean_eps), mean_eps_expect); EXPECT_LT(std::abs(variance_eps), variance_eps_expect); @@ -358,13 +358,13 @@ struct test_random_scalar { (result.covariance / HIST_DIM1D - covariance_expect) / mean_expect; #if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT - if (std::is_same::value) { + if (std::is_same_v) { variance_factor = 7; } #endif #if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT - if (!std::is_same::value) { + if (!std::is_same_v) { #endif EXPECT_LT(std::abs(mean_eps), tolerance); EXPECT_LT(std::abs(variance_eps), variance_factor); diff --git a/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp b/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp index 5ab348cb19..65e45ebb96 100644 --- a/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp @@ -37,12 +37,18 @@ struct random_access_iterator_test : std_algorithms_test { TEST_F(random_access_iterator_test, constructor) { // just tests that constructor works - auto it1 = KE::Impl::RandomAccessIterator(m_static_view); - auto it2 = KE::Impl::RandomAccessIterator(m_dynamic_view); - auto it3 = KE::Impl::RandomAccessIterator(m_strided_view); - auto it4 = KE::Impl::RandomAccessIterator(m_static_view, 3); - auto it5 = KE::Impl::RandomAccessIterator(m_dynamic_view, 3); - auto it6 = KE::Impl::RandomAccessIterator(m_strided_view, 3); + [[maybe_unused]] auto it1 = + KE::Impl::RandomAccessIterator(m_static_view); + [[maybe_unused]] auto it2 = + KE::Impl::RandomAccessIterator(m_dynamic_view); + [[maybe_unused]] auto it3 = + KE::Impl::RandomAccessIterator(m_strided_view); + [[maybe_unused]] auto it4 = + KE::Impl::RandomAccessIterator(m_static_view, 3); + [[maybe_unused]] auto it5 = + KE::Impl::RandomAccessIterator(m_dynamic_view, 3); + [[maybe_unused]] auto it6 = + KE::Impl::RandomAccessIterator(m_strided_view, 3); EXPECT_TRUE(true); } diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp index 5ea88ae5d6..562ff97e42 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -99,6 +99,7 @@ void test_dynamic_view_sort_impl(unsigned int n) { Kokkos::Experimental::DynamicView; using KeyViewType = Kokkos::View; + // NOLINTNEXTLINE(bugprone-implicit-widening-of-multiplication-result) const size_t upper_bound = 2 * n; const size_t min_chunk_size = 1024; diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp index dadce2d474..d8a68f768a 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp @@ -198,9 +198,8 @@ auto create_deep_copyable_compatible_view_with_same_extent(ViewType view) { // this is needed for intel to avoid // error #1011: missing return statement at end of non-void function -#if defined KOKKOS_COMPILER_INTEL || \ - (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ - !defined(KOKKOS_COMPILER_MSVC)) +#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC) __builtin_unreachable(); #endif } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp index 9324db12f2..ddb7dc2a68 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp @@ -507,6 +507,20 @@ struct TestStruct { } }; +#ifndef KOKKOS_ENABLE_CXX17 +template +constexpr bool +test_kokkos_iterator_satify_std_random_access_iterator_concept() { + return std::random_access_iterator< + Kokkos::Experimental::Impl::RandomAccessIterator>; +} + +static_assert(test_kokkos_iterator_satify_std_random_access_iterator_concept< + Kokkos::View>()); +static_assert(test_kokkos_iterator_satify_std_random_access_iterator_concept< + Kokkos::View>()); +#endif + } // namespace compileonly } // namespace stdalgos } // namespace Test diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp index 923ea970f9..67d21dd740 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp @@ -173,6 +173,7 @@ TEST(std_algorithms_DeathTest, expect_no_overlap) { KE::Impl::expect_no_overlap(sub_first_d0, sub_last_d0, sub_first_d1); + // NOLINTNEXTLINE(bugprone-implicit-widening-of-multiplication-result) Kokkos::LayoutStride layout2d{2, 3, extent0, 2 * 3}; Kokkos::View strided_view_2d{ "std-algo-test-2d-contiguous-view-strided", layout2d}; diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp index a85e63fe34..1a81991c35 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp @@ -171,7 +171,7 @@ struct VerifyData { create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc); if (test_view_h.extent(0) > 0) { for (std::size_t i = 0; i < test_view_h.extent(0); ++i) { - if (std::is_same::value) { + if (std::is_same_v) { ASSERT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp index b4f40b4651..c8ecc137e2 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp @@ -184,7 +184,7 @@ struct VerifyData { const auto ext = test_view_h.extent(0); if (ext > 0) { for (std::size_t i = 0; i < ext; ++i) { - if (std::is_same::value) { + if (std::is_same_v) { ASSERT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp index 8327bfe13c..9e30630f07 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp @@ -153,12 +153,13 @@ void run_single_scenario(const InfoType& scenario_info) { #if !defined KOKKOS_ENABLE_OPENMPTARGET CustomLessThanComparator comp; - auto r5 = + [[maybe_unused]] auto r5 = KE::is_sorted_until(exespace(), KE::cbegin(view), KE::cend(view), comp); - auto r6 = KE::is_sorted_until("label", exespace(), KE::cbegin(view), - KE::cend(view), comp); - auto r7 = KE::is_sorted_until(exespace(), view, comp); - auto r8 = KE::is_sorted_until("label", exespace(), view, comp); + [[maybe_unused]] auto r6 = KE::is_sorted_until( + "label", exespace(), KE::cbegin(view), KE::cend(view), comp); + [[maybe_unused]] auto r7 = KE::is_sorted_until(exespace(), view, comp); + [[maybe_unused]] auto r8 = + KE::is_sorted_until("label", exespace(), view, comp); #endif ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{}); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp index 6918185bc0..1fbeab3d9d 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp @@ -53,13 +53,13 @@ TEST(std_algorithms_mod_ops_test, move) { // move constr MyMovableType b(std::move(a)); ASSERT_EQ(b.m_value, 11); - ASSERT_EQ(a.m_value, -2); + ASSERT_EQ(a.m_value, -2); // NOLINT(bugprone-use-after-move) // move assign MyMovableType c; c = std::move(b); ASSERT_EQ(c.m_value, 11); - ASSERT_EQ(b.m_value, -4); + ASSERT_EQ(b.m_value, -4); // NOLINT(bugprone-use-after-move) } template @@ -70,7 +70,7 @@ struct StdAlgoModSeqOpsTestMove { void operator()(const int index) const { typename ViewType::value_type a{11}; using move_t = decltype(std::move(a)); - static_assert(std::is_rvalue_reference::value); + static_assert(std::is_rvalue_reference_v); m_view(index) = std::move(a); } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp index 0933c4e135..a3d7df533b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp @@ -243,16 +243,15 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view, ViewType2 second_view, ValueType init_value, ValueType result_value, - Args&&... args) { + Args const&... args) { // trivial cases const auto r1 = KE::transform_reduce( ExecutionSpace(), KE::cbegin(first_view), KE::cbegin(first_view), - KE::cbegin(second_view), init_value, std::forward(args)...); + KE::cbegin(second_view), init_value, args...); - const auto r2 = - KE::transform_reduce("MYLABEL", ExecutionSpace(), KE::cbegin(first_view), - KE::cbegin(first_view), KE::cbegin(second_view), - init_value, std::forward(args)...); + const auto r2 = KE::transform_reduce( + "MYLABEL", ExecutionSpace(), KE::cbegin(first_view), + KE::cbegin(first_view), KE::cbegin(second_view), init_value, args...); ASSERT_EQ(r1, init_value); ASSERT_EQ(r2, init_value); @@ -260,18 +259,16 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view, // non trivial cases const auto r3 = KE::transform_reduce( ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view), - KE::cbegin(second_view), init_value, std::forward(args)...); + KE::cbegin(second_view), init_value, args...); const auto r4 = KE::transform_reduce( "MYLABEL", ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view), - KE::cbegin(second_view), init_value, std::forward(args)...); + KE::cbegin(second_view), init_value, args...); - const auto r5 = - KE::transform_reduce(ExecutionSpace(), first_view, second_view, - init_value, std::forward(args)...); - const auto r6 = - KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view, second_view, - init_value, std::forward(args)...); + const auto r5 = KE::transform_reduce(ExecutionSpace(), first_view, + second_view, init_value, args...); + const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view, + second_view, init_value, args...); ASSERT_EQ(r3, result_value); ASSERT_EQ(r4, result_value); @@ -363,32 +360,30 @@ template void run_and_check_transform_reduce_overloadB(ViewType view, ValueType init_value, ValueType result_value, - Args&&... args) { + Args const&... args) { // trivial - const auto r1 = - KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), KE::cbegin(view), - init_value, std::forward(args)...); + const auto r1 = KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), + KE::cbegin(view), init_value, args...); - const auto r2 = KE::transform_reduce("MYLABEL", ExecutionSpace(), - KE::cbegin(view), KE::cbegin(view), - init_value, std::forward(args)...); + const auto r2 = + KE::transform_reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), + KE::cbegin(view), init_value, args...); ASSERT_EQ(r1, init_value); ASSERT_EQ(r2, init_value); // non trivial - const auto r3 = - KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view), - init_value, std::forward(args)...); + const auto r3 = KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), + KE::cend(view), init_value, args...); - const auto r4 = KE::transform_reduce("MYLABEL", ExecutionSpace(), - KE::cbegin(view), KE::cend(view), - init_value, std::forward(args)...); - const auto r5 = KE::transform_reduce(ExecutionSpace(), view, init_value, - std::forward(args)...); + const auto r4 = + KE::transform_reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), + KE::cend(view), init_value, args...); + const auto r5 = + KE::transform_reduce(ExecutionSpace(), view, init_value, args...); const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), view, - init_value, std::forward(args)...); + init_value, args...); ASSERT_EQ(r3, result_value); ASSERT_EQ(r4, result_value); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp index bf5c2ee782..b9545e8b2e 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp @@ -196,7 +196,7 @@ void run_single_scenario(const InfoType& scenario_info, // create host copy BEFORE rotate or view will be modified auto view_h = create_host_space_copy(view); auto rit = KE::rotate(exespace(), view, rotation_point); - // verify_data(rit, view, view_h, rotation_point); + verify_data(rit, view, view_h, rotation_point); } { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentDifference.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentDifference.cpp index 5a2c046939..1dfdcfd568 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentDifference.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentDifference.cpp @@ -191,6 +191,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { ASSERT_EQ(stdDistance, distancesView_h(i)); break; } + default: Kokkos::abort("unreachable"); } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentFind.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentFind.cpp index 95f2934e01..88fc649a9b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentFind.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentFind.cpp @@ -217,6 +217,7 @@ void test_A(const bool ensureAdjacentFindCanFind, std::size_t numTeams, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamEqual.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamEqual.cpp index 82cce0b384..592bb4c864 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamEqual.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamEqual.cpp @@ -244,6 +244,7 @@ void test_A(const bool viewsAreEqual, std::size_t numTeams, std::size_t numCols, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp index 0c35c5e599..0c9f1e1bd2 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp @@ -224,6 +224,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { break; } #endif + default: Kokkos::abort("unreachable"); } #undef exclusive_scan diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindEnd.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindEnd.cpp index d350bc62cd..21a905be56 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindEnd.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindEnd.cpp @@ -227,6 +227,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams, break; } + default: Kokkos::abort("unreachable"); } if (sequencesExist) { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindFirstOf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindFirstOf.cpp index e992882e91..ad1043362e 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindFirstOf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindFirstOf.cpp @@ -244,6 +244,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIf.cpp index 70f2be77f6..f21f947e97 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIf.cpp @@ -57,14 +57,7 @@ struct TestFunctorA { const auto myRowIndex = member.league_rank(); auto myRowViewFrom = Kokkos::subview(m_dataView, myRowIndex, Kokkos::ALL()); const auto val = m_greaterThanValuesView(myRowIndex); - // FIXME_INTEL -#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL) - GreaterEqualFunctor< - typename GreaterThanValuesViewType::non_const_value_type> - unaryPred{val}; -#else GreaterEqualFunctor unaryPred{val}; -#endif ptrdiff_t resultDist = 0; switch (m_apiPick) { @@ -185,12 +178,7 @@ void test_A(const bool predicatesReturnTrue, std::size_t numTeams, const auto rowFromBegin = KE::cbegin(rowFrom); const auto rowFromEnd = KE::cend(rowFrom); const auto val = greaterEqualValuesView_h(i); - // FIXME_INTEL -#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL) - const GreaterEqualFunctor unaryPred{val}; -#else const GreaterEqualFunctor unaryPred{val}; -#endif auto it = std::find_if(rowFromBegin, rowFromEnd, unaryPred); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIfNot.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIfNot.cpp index 873e8faf4c..0794dc0a79 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIfNot.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIfNot.cpp @@ -57,14 +57,7 @@ struct TestFunctorA { const auto myRowIndex = member.league_rank(); auto myRowViewFrom = Kokkos::subview(m_dataView, myRowIndex, Kokkos::ALL()); const auto val = m_greaterThanValuesView(myRowIndex); - // FIXME_INTEL -#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL) - GreaterEqualFunctor< - typename GreaterThanValuesViewType::non_const_value_type> - unaryPred{val}; -#else GreaterEqualFunctor unaryPred{val}; -#endif ptrdiff_t resultDist = 0; switch (m_apiPick) { @@ -180,12 +173,7 @@ void test_A(const bool predicatesReturnTrue, std::size_t numTeams, const auto rowFromBegin = KE::cbegin(rowFrom); const auto rowFromEnd = KE::cend(rowFrom); const auto val = greaterEqualValuesView_h(i); - // FIXME_INTEL -#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL) - const GreaterEqualFunctor unaryPred{val}; -#else const GreaterEqualFunctor unaryPred{val}; -#endif auto it = std::find_if_not(rowFromBegin, rowFromEnd, unaryPred); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamInclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamInclusiveScan.cpp index b5f4cdd612..4c77eff9c4 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamInclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamInclusiveScan.cpp @@ -253,6 +253,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { break; } + default: Kokkos::abort("unreachable"); } #undef inclusive_scan diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamLexicographicalCompare.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamLexicographicalCompare.cpp index c377b9fec8..9d2d2721c6 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamLexicographicalCompare.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamLexicographicalCompare.cpp @@ -245,6 +245,7 @@ void test_A(const TestCaseType testCase, std::size_t numTeams, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMismatch.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMismatch.cpp index 84269511d8..9b245508e3 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMismatch.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMismatch.cpp @@ -249,6 +249,7 @@ void test_A(const bool viewsAreEqual, std::size_t numTeams, std::size_t numCols, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp index eb00d9e083..88264b45c0 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp @@ -242,6 +242,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { break; } + default: Kokkos::abort("unreachable"); } #undef reduce diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearch.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearch.cpp index 039db4095d..1f0f4b6c1b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearch.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearch.cpp @@ -243,6 +243,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearchN.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearchN.cpp index 25cd1471e0..6d8a34e842 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearchN.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearchN.cpp @@ -258,6 +258,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp index 1c43854381..60e199a350 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp @@ -203,6 +203,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { ASSERT_EQ(stdDistance, distancesView_h(i)); break; } + default: Kokkos::abort("unreachable"); } #undef transform_exclusive_scan diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp index 78a21c4430..0dc3e68b1d 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp @@ -240,6 +240,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { break; } + default: Kokkos::abort("unreachable"); } } #undef transform_inclusive_scan diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp index 17ded226aa..3ad0b5b354 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp @@ -293,6 +293,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { break; } + default: Kokkos::abort("unreachable"); } #undef transform_reduce diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp index 365ca21688..e3114daeae 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp @@ -344,8 +344,7 @@ TEST(std_algorithms_numeric_ops_test, transform_exclusive_scan_functor) { using view_type = Kokkos::View; view_type dummy_view("dummy_view", 0); using unary_op_type = - Kokkos::Experimental::Impl::StdNumericScanIdentityReferenceUnaryFunctor< - int>; + Kokkos::Experimental::Impl::StdNumericScanIdentityReferenceUnaryFunctor; using functor_type = Kokkos::Experimental::Impl::TransformExclusiveScanFunctorWithValueWrapper< exespace, int, int, view_type, view_type, MultiplyFunctor, diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp index cc87262147..2dda12e22d 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp @@ -390,8 +390,7 @@ TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan_functor) { int dummy = 0; using view_type = Kokkos::View; view_type dummy_view("dummy_view", 0); - using unary_op_type = - KE::Impl::StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = KE::Impl::StdNumericScanIdentityReferenceUnaryFunctor; { using functor_type = KE::Impl::ExeSpaceTransformInclusiveScanNoInitValueFunctor< diff --git a/lib/kokkos/benchmarks/atomic/Makefile b/lib/kokkos/benchmarks/atomic/Makefile index 636c0ad4ab..c59de75ce8 100644 --- a/lib/kokkos/benchmarks/atomic/Makefile +++ b/lib/kokkos/benchmarks/atomic/Makefile @@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_ARCH = "SNB,Volta70" +KOKKOS_USE_DEPRECATED_MAKEFILES=1 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) diff --git a/lib/kokkos/benchmarks/bytes_and_flops/Makefile b/lib/kokkos/benchmarks/bytes_and_flops/Makefile index 1aa4edddcd..4b6f084d20 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/Makefile +++ b/lib/kokkos/benchmarks/bytes_and_flops/Makefile @@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_ARCH = "SNB,Volta70" +KOKKOS_USE_DEPRECATED_MAKEFILES=1 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) diff --git a/lib/kokkos/benchmarks/gather/Makefile b/lib/kokkos/benchmarks/gather/Makefile index 6827995bed..e1bfce21a6 100644 --- a/lib/kokkos/benchmarks/gather/Makefile +++ b/lib/kokkos/benchmarks/gather/Makefile @@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_ARCH = "SNB,Volta70" +KOKKOS_USE_DEPRECATED_MAKEFILES=1 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) diff --git a/lib/kokkos/benchmarks/launch_latency/launch_latency.cpp b/lib/kokkos/benchmarks/launch_latency/launch_latency.cpp index 156c29af09..0935706ee8 100644 --- a/lib/kokkos/benchmarks/launch_latency/launch_latency.cpp +++ b/lib/kokkos/benchmarks/launch_latency/launch_latency.cpp @@ -37,7 +37,7 @@ template struct TestFunctor { - double values[V]; + double values[V] = {}; Kokkos::View a; int K; TestFunctor(Kokkos::View a_, int K_) : a(a_), K(K_) {} @@ -50,7 +50,7 @@ struct TestFunctor { template struct TestRFunctor { - double values[V]; + double values[V] = {}; Kokkos::View a; int K; TestRFunctor(Kokkos::View a_, int K_) : a(a_), K(K_) {} @@ -247,12 +247,15 @@ int main(int argc, char* argv[]) { // anything that doesn't start with -- if (arg.size() < 2 || (arg.size() >= 2 && arg[0] != '-' && arg[1] != '-')) { + // signing off that arg.data() is null terminated + // NOLINTBEGIN(bugprone-suspicious-stringview-data-usage) if (i == 1) N = atoi(arg.data()); else if (i == 2) M = atoi(arg.data()); else if (i == 3) K = atoi(arg.data()); + // NOLINTEND(bugprone-suspicious-stringview-data-usage) else { Kokkos::abort("unexpected argument!"); } diff --git a/lib/kokkos/benchmarks/policy_performance/Makefile b/lib/kokkos/benchmarks/policy_performance/Makefile index f50aea720e..21365f36c6 100644 --- a/lib/kokkos/benchmarks/policy_performance/Makefile +++ b/lib/kokkos/benchmarks/policy_performance/Makefile @@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_ARCH = "SNB,Volta70" +KOKKOS_USE_DEPRECATED_MAKEFILES=1 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) diff --git a/lib/kokkos/benchmarks/policy_performance/main.cpp b/lib/kokkos/benchmarks/policy_performance/main.cpp index 0983a3d535..dd61ba6502 100644 --- a/lib/kokkos/benchmarks/policy_performance/main.cpp +++ b/lib/kokkos/benchmarks/policy_performance/main.cpp @@ -120,11 +120,12 @@ int main(int argc, char* argv[]) { // view appropriately for test and should obey first-touch etc Second call to // test is the one we actually care about and time view_type_1d v_1(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_1"), - team_range * team_size); + static_cast(team_range) * team_size); view_type_2d v_2(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_2"), - team_range * team_size, thread_range); + static_cast(team_range) * team_size, thread_range); view_type_3d v_3(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_3"), - team_range * team_size, thread_range, vector_range); + static_cast(team_range) * team_size, thread_range, + vector_range); double result_computed = 0.0; double result_expect = 0.0; diff --git a/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp index 0e23d221f6..8a874e0139 100644 --- a/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp +++ b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp @@ -367,7 +367,7 @@ void test_policy(int team_range, int thread_range, int vector_range, // parallel_for RangePolicy: range = team_size*team_range if (test_type == 300) { Kokkos::parallel_for( - "300 outer for", team_size * team_range, + "300 outer for", static_cast(team_size) * team_range, KOKKOS_LAMBDA(const int idx) { v1(idx) = idx; // prevent compiler from optimizing away the loop @@ -376,14 +376,15 @@ void test_policy(int team_range, int thread_range, int vector_range, // parallel_reduce RangePolicy: range = team_size*team_range if (test_type == 400) { Kokkos::parallel_reduce( - "400 outer reduce", team_size * team_range, + "400 outer reduce", static_cast(team_size) * team_range, KOKKOS_LAMBDA(const int idx, double& val) { val += idx; }, result); result_expect = 0.5 * (team_size * team_range) * (team_size * team_range - 1); } // parallel_scan RangePolicy: range = team_size*team_range if (test_type == 500) { - Kokkos::parallel_scan("500 outer scan", team_size * team_range, + Kokkos::parallel_scan("500 outer scan", + static_cast(team_size) * team_range, ParallelScanFunctor(v1) #if 0 // This does not compile with pre Cuda 8.0 - see Github Issue #913 for explanation diff --git a/lib/kokkos/benchmarks/stream/Makefile b/lib/kokkos/benchmarks/stream/Makefile index 47a13838a4..529e789247 100644 --- a/lib/kokkos/benchmarks/stream/Makefile +++ b/lib/kokkos/benchmarks/stream/Makefile @@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_ARCH = "SNB,Volta70" +KOKKOS_USE_DEPRECATED_MAKEFILES=1 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) diff --git a/lib/kokkos/benchmarks/view_copy_constructor/Makefile b/lib/kokkos/benchmarks/view_copy_constructor/Makefile index 70c6d517e0..77845a22b1 100644 --- a/lib/kokkos/benchmarks/view_copy_constructor/Makefile +++ b/lib/kokkos/benchmarks/view_copy_constructor/Makefile @@ -1,6 +1,7 @@ KOKKOS_DEVICES=Serial KOKKOS_ARCH = "" +KOKKOS_USE_DEPRECATED_MAKEFILES=1 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper index d58645f98a..8d3dbf1c75 100755 --- a/lib/kokkos/bin/nvcc_wrapper +++ b/lib/kokkos/bin/nvcc_wrapper @@ -317,7 +317,7 @@ do # End of Werror handling #Handle unsupported standard flags --std=c++1y|-std=c++1y|--std=gnu++1y|-std=gnu++1y|--std=c++1z|-std=c++1z|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a) - fallback_std_flag="-std=c++14" + fallback_std_flag="-std=c++17" # this is hopefully just occurring in a downstream project during CMake feature tests # we really have no choice here but to accept the flag and change to an accepted C++ standard echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++17 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." @@ -346,35 +346,17 @@ do # NVCC only has C++20 from version 12 on cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]}) if [ ${cuda_main_version} -lt 12 ]; then - fallback_std_flag="-std=c++14" + fallback_std_flag="-std=c++17" # this is hopefully just occurring in a downstream project during CMake feature tests # we really have no choice here but to accept the flag and change to an accepted C++ standard - echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." + echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++17 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." std_flag=$fallback_std_flag else std_flag=$1 fi shared_args="$shared_args $std_flag" ;; - --std=c++17|-std=c++17) - if [ -n "$std_flag" ]; then - warn_std_flag - shared_args=${shared_args/ $std_flag/} - fi - # NVCC only has C++17 from version 11 on - cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]}) - if [ ${cuda_main_version} -lt 11 ]; then - fallback_std_flag="-std=c++14" - # this is hopefully just occurring in a downstream project during CMake feature tests - # we really have no choice here but to accept the flag and change to an accepted C++ standard - echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." - std_flag=$fallback_std_flag - else - std_flag=$1 - fi - shared_args="$shared_args $std_flag" - ;; - --std=c++11|-std=c++11|--std=c++14|-std=c++14) + --std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++17|-std=c++17) if [ -n "$std_flag" ]; then warn_std_flag shared_args=${shared_args/ $std_flag/} @@ -500,6 +482,20 @@ do xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}" host_linker_args="$host_linker_args ${1:4:${#1}}" ;; + #Handle host assembler options + -Wa,*) + #To pass the -Wa options to the host compiler via -Xcompiler it is necessary + #to use '\\,' for each comma in the options. As users might already add escapes + #to the comma by themselves, the escapes are first removed and then only the + #required number of \ are added back. + xcompiler_args_wa=$(echo -e "$1" | sed -E 's/\\\+,/,/g' | sed -E 's/,/\\\\\\\,/g') + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="$xcompiler_args_wa" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$xcompiler_args_wa" + fi + ;; #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking *.a|*.so|*.o|*.obj) object_files="$object_files $1" diff --git a/lib/kokkos/cmake/KokkosConfig.cmake.in b/lib/kokkos/cmake/KokkosConfig.cmake.in index 1b6d1b66ff..aed9f1060c 100644 --- a/lib/kokkos/cmake/KokkosConfig.cmake.in +++ b/lib/kokkos/cmake/KokkosConfig.cmake.in @@ -2,65 +2,71 @@ # loaded by include() and find_package() commands except when invoked with # the NO_POLICY_SCOPE option # CMP0057 + NEW -> IN_LIST operator in IF(...) -CMAKE_POLICY(SET CMP0057 NEW) +cmake_policy(SET CMP0057 NEW) # Compute paths @PACKAGE_INIT@ #Find dependencies -INCLUDE(CMakeFindDependencyMacro) +include(CMakeFindDependencyMacro) #This needs to go above the KokkosTargets in case #the Kokkos targets depend in some way on the TPL imports @KOKKOS_TPL_EXPORTS@ -GET_FILENAME_COMPONENT(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) -INCLUDE("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake") -INCLUDE("${Kokkos_CMAKE_DIR}/KokkosConfigCommon.cmake") -UNSET(Kokkos_CMAKE_DIR) +get_filename_component(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +include("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake") +include("${Kokkos_CMAKE_DIR}/KokkosConfigCommon.cmake") +unset(Kokkos_CMAKE_DIR) # check for conflicts -IF("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS AND - "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS) - MESSAGE(STATUS "'launch_compiler' implies global redirection of targets depending on Kokkos to appropriate compiler.") - MESSAGE(STATUS "'separable_compilation' implies explicitly defining where redirection occurs via 'kokkos_compilation(PROJECT|TARGET|SOURCE|DIRECTORY ...)'") - MESSAGE(FATAL_ERROR "Conflicting COMPONENTS: 'launch_compiler' and 'separable_compilation'") -ENDIF() +if("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS AND "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS) + message(STATUS "'launch_compiler' implies global redirection of targets depending on Kokkos to appropriate compiler.") + message( + STATUS + "'separable_compilation' implies explicitly defining where redirection occurs via 'kokkos_compilation(PROJECT|TARGET|SOURCE|DIRECTORY ...)'" + ) + message(FATAL_ERROR "Conflicting COMPONENTS: 'launch_compiler' and 'separable_compilation'") +endif() -IF("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS) - # - # if find_package(Kokkos COMPONENTS launch_compiler) then rely on the - # RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK to always redirect to the - # appropriate compiler for Kokkos - # +if("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS) + # + # if find_package(Kokkos COMPONENTS launch_compiler) then rely on the + # RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK to always redirect to the + # appropriate compiler for Kokkos + # - MESSAGE(STATUS "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos") - kokkos_compilation( - GLOBAL - CHECK_CUDA_COMPILES) + message( + STATUS + "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos" + ) + kokkos_compilation(GLOBAL CHECK_CUDA_COMPILES) -ELSEIF(@Kokkos_ENABLE_CUDA@ - AND NOT @KOKKOS_COMPILE_LANGUAGE@ STREQUAL CUDA - AND NOT "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS) - # - # if CUDA was enabled, the compilation language was not set to CUDA, and separable compilation was not - # specified, then set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK globally and - # kokkos_launch_compiler will re-direct to the compiler used to compile CUDA code during installation. - # kokkos_launch_compiler will re-direct if ${CMAKE_CXX_COMPILER} and -DKOKKOS_DEPENDENCE is present, - # otherwise, the original command will be executed - # +elseif(@Kokkos_ENABLE_CUDA@ AND NOT @KOKKOS_COMPILE_LANGUAGE@ STREQUAL CUDA AND NOT "separable_compilation" IN_LIST + Kokkos_FIND_COMPONENTS +) + # + # if CUDA was enabled, the compilation language was not set to CUDA, and separable compilation was not + # specified, then set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK globally and + # kokkos_launch_compiler will re-direct to the compiler used to compile CUDA code during installation. + # kokkos_launch_compiler will re-direct if ${CMAKE_CXX_COMPILER} and -DKOKKOS_DEPENDENCE is present, + # otherwise, the original command will be executed + # - # run test to see if CMAKE_CXX_COMPILER=nvcc_wrapper - kokkos_compiler_is_nvcc(IS_NVCC ${CMAKE_CXX_COMPILER}) + # run test to see if CMAKE_CXX_COMPILER=nvcc_wrapper + kokkos_compiler_is_nvcc(IS_NVCC ${CMAKE_CXX_COMPILER}) - # if not nvcc_wrapper and Kokkos_LAUNCH_COMPILER was not set to OFF - IF(NOT IS_NVCC AND (NOT DEFINED Kokkos_LAUNCH_COMPILER OR Kokkos_LAUNCH_COMPILER)) - MESSAGE(STATUS "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos") - kokkos_compilation(GLOBAL) - ENDIF() + # if not nvcc_wrapper and Kokkos_LAUNCH_COMPILER was not set to OFF + if(NOT IS_NVCC AND (NOT DEFINED Kokkos_LAUNCH_COMPILER OR Kokkos_LAUNCH_COMPILER)) + message( + STATUS + "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos" + ) + kokkos_compilation(GLOBAL) + endif() - # be mindful of the environment, pollution is bad - UNSET(IS_NVCC) -ENDIF() + # be mindful of the environment, pollution is bad + unset(IS_NVCC) +endif() set(Kokkos_COMPILE_LANGUAGE @KOKKOS_COMPILE_LANGUAGE@) diff --git a/lib/kokkos/cmake/KokkosConfigCommon.cmake.in b/lib/kokkos/cmake/KokkosConfigCommon.cmake.in index d3ac39ffa3..769dff6b10 100644 --- a/lib/kokkos/cmake/KokkosConfigCommon.cmake.in +++ b/lib/kokkos/cmake/KokkosConfigCommon.cmake.in @@ -1,67 +1,67 @@ -SET(Kokkos_DEVICES @KOKKOS_ENABLED_DEVICES@) -SET(Kokkos_OPTIONS @KOKKOS_ENABLED_OPTIONS@) -SET(Kokkos_TPLS @KOKKOS_ENABLED_TPLS@) -SET(Kokkos_ARCH @KOKKOS_ENABLED_ARCH_LIST@) -SET(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@") -SET(Kokkos_CXX_COMPILER_ID "@KOKKOS_CXX_COMPILER_ID@") -SET(Kokkos_CXX_COMPILER_VERSION "@KOKKOS_CXX_COMPILER_VERSION@") -SET(Kokkos_CXX_STANDARD @KOKKOS_CXX_STANDARD@) +set(Kokkos_DEVICES @KOKKOS_ENABLED_DEVICES@) +set(Kokkos_OPTIONS @KOKKOS_ENABLED_OPTIONS@) +set(Kokkos_TPLS @KOKKOS_ENABLED_TPLS@) +set(Kokkos_ARCH @KOKKOS_ENABLED_ARCH_LIST@) +set(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@") +set(Kokkos_CXX_COMPILER_ID "@KOKKOS_CXX_COMPILER_ID@") +set(Kokkos_CXX_COMPILER_VERSION "@KOKKOS_CXX_COMPILER_VERSION@") +set(Kokkos_CXX_STANDARD @KOKKOS_CXX_STANDARD@) # Required to be a TriBITS-compliant external package -IF(NOT TARGET Kokkos::all_libs) +if(NOT TARGET Kokkos::all_libs) # CMake Error at /lib/cmake/Kokkos/KokkosConfigCommon.cmake:10 (ADD_LIBRARY): # ADD_LIBRARY cannot create ALIAS target "Kokkos::all_libs" because target # "Kokkos::kokkos" is imported but not globally visible. - IF(CMAKE_VERSION VERSION_LESS "3.18") - SET_TARGET_PROPERTIES(Kokkos::kokkos PROPERTIES IMPORTED_GLOBAL ON) - ENDIF() - ADD_LIBRARY(Kokkos::all_libs ALIAS Kokkos::kokkos) -ENDIF() + if(CMAKE_VERSION VERSION_LESS "3.18") + set_target_properties(Kokkos::kokkos PROPERTIES IMPORTED_GLOBAL ON) + endif() + add_library(Kokkos::all_libs ALIAS Kokkos::kokkos) +endif() # Export Kokkos_ENABLE_ for each backend that was enabled. # NOTE: "Devices" is a little bit of a misnomer here. These are really # backends, e.g. Kokkos_ENABLE_OPENMP, Kokkos_ENABLE_CUDA, Kokkos_ENABLE_HIP, # or Kokkos_ENABLE_SYCL. -FOREACH(DEV ${Kokkos_DEVICES}) - SET(Kokkos_ENABLE_${DEV} ON) -ENDFOREACH() +foreach(DEV ${Kokkos_DEVICES}) + set(Kokkos_ENABLE_${DEV} ON) +endforeach() # Export relevant Kokkos_ENABLE