From 39786b17407dd03a501f8582df53bd56a25d16f7 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 8 Mar 2018 10:57:08 -0700 Subject: [PATCH] Update Kokkos library to r2.6.00 --- lib/kokkos/CHANGELOG.md | 44 + lib/kokkos/CMakeLists.txt | 14 +- lib/kokkos/Copyright.txt | 2 +- lib/kokkos/HOW_TO_SNAPSHOT | 2 +- lib/kokkos/LICENSE | 2 +- lib/kokkos/Makefile.kokkos | 116 +- lib/kokkos/README | 199 +- lib/kokkos/algorithms/src/Kokkos_Random.hpp | 150 +- lib/kokkos/algorithms/src/Kokkos_Sort.hpp | 48 +- lib/kokkos/algorithms/unit_tests/TestCuda.cpp | 7 +- .../algorithms/unit_tests/TestOpenMP.cpp | 17 +- lib/kokkos/algorithms/unit_tests/TestROCm.cpp | 6 +- .../algorithms/unit_tests/TestRandom.hpp | 2 +- .../algorithms/unit_tests/TestSerial.cpp | 5 +- lib/kokkos/algorithms/unit_tests/TestSort.hpp | 104 +- .../algorithms/unit_tests/TestThreads.cpp | 17 +- .../algorithms/unit_tests/UnitTestMain.cpp | 8 +- lib/kokkos/benchmarks/atomic/Makefile | 2 +- .../benchmark_suite/scripts/run_tests.bash | 4 +- .../benchmarks/bytes_and_flops/bench.hpp | 2 +- .../bytes_and_flops/bench_stride.hpp | 2 +- .../bytes_and_flops/bench_unroll_stride.hpp | 2 +- .../benchmarks/bytes_and_flops/main.cpp | 2 +- lib/kokkos/benchmarks/gather/gather.hpp | 2 +- .../benchmarks/gather/gather_unroll.hpp | 2 +- lib/kokkos/benchmarks/gather/main.cpp | 2 +- .../benchmarks/policy_performance/main.cpp | 2 +- .../policy_performance/policy_perf_test.hpp | 2 +- lib/kokkos/cmake/Modules/FindHWLOC.cmake | 6 +- lib/kokkos/cmake/kokkos_build.cmake | 36 +- lib/kokkos/cmake/kokkos_options.cmake | 6 +- lib/kokkos/cmake/kokkos_settings.cmake | 87 +- lib/kokkos/cmake/tribits.cmake | 7 +- lib/kokkos/config/configure_compton_cpu.sh | 190 -- lib/kokkos/config/configure_compton_mic.sh | 186 -- lib/kokkos/config/configure_kokkos.sh | 293 -- lib/kokkos/config/configure_kokkos_bgq.sh | 88 - lib/kokkos/config/configure_kokkos_dev.sh | 216 -- lib/kokkos/config/configure_kokkos_nvidia.sh | 204 -- lib/kokkos/config/configure_shannon.sh | 190 -- ...nfigure_tpetra_kokkos_cuda_nvcc_wrapper.sh | 140 - .../kokkos-trilinos-integration-procedure.txt | 148 - .../config/kokkos_dev/config-core-all.sh | 110 - .../kokkos_dev/config-core-cuda-omp-hwloc.sh | 104 - .../config/kokkos_dev/config-core-cuda.sh | 88 - .../kokkos_dev/config-core-cxx11-omp.sh | 84 - .../config/kokkos_dev/config-core-dbg-none.sh | 78 - .../kokkos_dev/config-core-intel-cuda-omp.sh | 89 - .../kokkos_dev/config-core-intel-omp.sh | 84 - .../config/kokkos_dev/config-core-omp.sh | 77 - .../kokkos_dev/config-core-threads-hwloc.sh | 87 - lib/kokkos/config/nvcc_wrapper | 340 -- lib/kokkos/config/test_all_sandia | 106 +- .../containers/performance_tests/TestCuda.cpp | 2 +- .../performance_tests/TestDynRankView.hpp | 26 +- .../performance_tests/TestGlobal2LocalIds.hpp | 8 +- .../containers/performance_tests/TestMain.cpp | 2 +- .../performance_tests/TestOpenMP.cpp | 2 +- .../containers/performance_tests/TestROCm.cpp | 2 +- .../performance_tests/TestScatterView.hpp | 2 +- .../performance_tests/TestThreads.cpp | 2 +- .../TestUnorderedMapPerformance.hpp | 2 +- lib/kokkos/containers/src/Kokkos_Bitset.hpp | 22 +- lib/kokkos/containers/src/Kokkos_DualView.hpp | 60 +- .../containers/src/Kokkos_DynRankView.hpp | 333 +- .../containers/src/Kokkos_DynamicView.hpp | 387 ++- .../containers/src/Kokkos_ErrorReporter.hpp | 10 +- .../containers/src/Kokkos_Functional.hpp | 2 +- .../containers/src/Kokkos_ScatterView.hpp | 11 +- .../containers/src/Kokkos_StaticCrsGraph.hpp | 12 +- .../containers/src/Kokkos_UnorderedMap.hpp | 36 +- lib/kokkos/containers/src/Kokkos_Vector.hpp | 2 +- .../src/impl/Kokkos_Bitset_impl.hpp | 4 +- .../src/impl/Kokkos_Functional_impl.hpp | 2 +- .../impl/Kokkos_StaticCrsGraph_factory.hpp | 2 +- .../src/impl/Kokkos_UnorderedMap_impl.cpp | 2 +- .../src/impl/Kokkos_UnorderedMap_impl.hpp | 14 +- .../containers/unit_tests/TestBitset.hpp | 2 +- lib/kokkos/containers/unit_tests/TestCuda.cpp | 6 +- .../containers/unit_tests/TestDualView.hpp | 8 +- .../containers/unit_tests/TestDynViewAPI.hpp | 565 ++-- .../containers/unit_tests/TestDynamicView.hpp | 241 +- .../unit_tests/TestErrorReporter.hpp | 2 +- .../containers/unit_tests/TestOpenMP.cpp | 5 +- lib/kokkos/containers/unit_tests/TestROCm.cpp | 2 +- .../containers/unit_tests/TestScatterView.hpp | 4 +- .../containers/unit_tests/TestSerial.cpp | 4 +- .../unit_tests/TestStaticCrsGraph.hpp | 24 +- .../containers/unit_tests/TestThreads.cpp | 17 +- .../unit_tests/TestUnorderedMap.hpp | 2 +- .../containers/unit_tests/TestVector.hpp | 2 +- .../TestViewCtorPropEmbeddedDim.hpp | 2 +- .../containers/unit_tests/UnitTestMain.cpp | 9 +- lib/kokkos/core/perf_test/Makefile | 1 + .../core/perf_test/PerfTestBlasKernels.hpp | 10 +- lib/kokkos/core/perf_test/PerfTestDriver.hpp | 2 +- .../core/perf_test/PerfTestGramSchmidt.cpp | 8 +- lib/kokkos/core/perf_test/PerfTestHexGrad.cpp | 2 +- lib/kokkos/core/perf_test/PerfTestMDRange.hpp | 2 +- lib/kokkos/core/perf_test/PerfTestMain.cpp | 2 +- .../core/perf_test/PerfTest_Category.hpp | 2 +- .../perf_test/PerfTest_CustomReduction.cpp | 2 +- .../core/perf_test/PerfTest_ViewCopy.cpp | 445 +++ lib/kokkos/core/perf_test/test_atomic.cpp | 2 +- lib/kokkos/core/perf_test/test_mempool.cpp | 2 +- lib/kokkos/core/perf_test/test_taskdag.cpp | 2 +- .../src/Cuda/KokkosExp_Cuda_IterateTile.hpp | 2 +- .../KokkosExp_Cuda_IterateTile_Refactor.hpp | 2 +- lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp | 2 +- lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp | 8 +- .../core/src/Cuda/Kokkos_Cuda_Alloc.hpp | 2 +- .../core/src/Cuda/Kokkos_Cuda_Error.hpp | 2 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp | 29 +- .../core/src/Cuda/Kokkos_Cuda_Internal.hpp | 2 +- .../core/src/Cuda/Kokkos_Cuda_Locks.cpp | 2 +- .../core/src/Cuda/Kokkos_Cuda_Locks.hpp | 19 +- .../core/src/Cuda/Kokkos_Cuda_Parallel.hpp | 158 +- .../core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp | 35 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp | 88 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp | 300 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp | 18 +- .../core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp | 2 +- .../src/Cuda/Kokkos_Cuda_Vectorization.hpp | 2 +- .../Kokkos_Cuda_Version_9_8_Compatibility.hpp | 58 + lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp | 10 +- .../src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp | 2 +- .../core/src/Cuda/Kokkos_Cuda_abort.hpp | 2 +- .../core/src/KokkosExp_MDRangePolicy.hpp | 98 +- lib/kokkos/core/src/Kokkos_AnonymousSpace.hpp | 127 + lib/kokkos/core/src/Kokkos_Array.hpp | 2 +- lib/kokkos/core/src/Kokkos_Atomic.hpp | 15 +- lib/kokkos/core/src/Kokkos_Complex.hpp | 4 +- lib/kokkos/core/src/Kokkos_Concepts.hpp | 2 +- lib/kokkos/core/src/Kokkos_CopyViews.hpp | 1720 +++++++++++ lib/kokkos/core/src/Kokkos_Core.hpp | 27 +- lib/kokkos/core/src/Kokkos_Core_fwd.hpp | 9 +- lib/kokkos/core/src/Kokkos_Crs.hpp | 10 +- lib/kokkos/core/src/Kokkos_Cuda.hpp | 2 +- lib/kokkos/core/src/Kokkos_CudaSpace.hpp | 2 +- lib/kokkos/core/src/Kokkos_ExecPolicy.hpp | 234 +- lib/kokkos/core/src/Kokkos_HBWSpace.hpp | 2 +- lib/kokkos/core/src/Kokkos_HostSpace.hpp | 2 +- lib/kokkos/core/src/Kokkos_Layout.hpp | 2 +- lib/kokkos/core/src/Kokkos_Macros.hpp | 15 +- lib/kokkos/core/src/Kokkos_MasterLock.hpp | 2 +- lib/kokkos/core/src/Kokkos_MemoryPool.hpp | 12 +- lib/kokkos/core/src/Kokkos_MemoryTraits.hpp | 2 +- lib/kokkos/core/src/Kokkos_NumericTraits.hpp | 2 +- lib/kokkos/core/src/Kokkos_OpenMP.hpp | 4 +- lib/kokkos/core/src/Kokkos_OpenMPTarget.hpp | 2 +- .../core/src/Kokkos_OpenMPTargetSpace.hpp | 2 +- lib/kokkos/core/src/Kokkos_Pair.hpp | 2 +- lib/kokkos/core/src/Kokkos_Parallel.hpp | 2 +- .../core/src/Kokkos_Parallel_Reduce.hpp | 237 +- .../src/Kokkos_Profiling_ProfileSection.hpp | 2 +- lib/kokkos/core/src/Kokkos_Qthreads.hpp | 2 +- lib/kokkos/core/src/Kokkos_ROCm.hpp | 12 +- lib/kokkos/core/src/Kokkos_ROCmSpace.hpp | 2 +- lib/kokkos/core/src/Kokkos_ScratchSpace.hpp | 2 +- lib/kokkos/core/src/Kokkos_Serial.hpp | 41 +- lib/kokkos/core/src/Kokkos_TaskPolicy.hpp | 2 +- lib/kokkos/core/src/Kokkos_TaskScheduler.hpp | 14 +- lib/kokkos/core/src/Kokkos_Threads.hpp | 2 +- lib/kokkos/core/src/Kokkos_Timer.hpp | 2 +- lib/kokkos/core/src/Kokkos_UniqueToken.hpp | 2 +- lib/kokkos/core/src/Kokkos_Vectorization.hpp | 2 +- lib/kokkos/core/src/Kokkos_View.hpp | 1674 +++++----- .../core/src/Kokkos_WorkGraphPolicy.hpp | 4 +- lib/kokkos/core/src/Kokkos_hwloc.hpp | 2 +- .../core/src/Makefile.generate_build_files | 23 +- .../core/src/OpenMP/Kokkos_OpenMP_Exec.cpp | 7 +- .../core/src/OpenMP/Kokkos_OpenMP_Exec.hpp | 8 +- .../src/OpenMP/Kokkos_OpenMP_Parallel.hpp | 15 +- .../core/src/OpenMP/Kokkos_OpenMP_Task.cpp | 2 +- .../core/src/OpenMP/Kokkos_OpenMP_Task.hpp | 2 +- .../core/src/OpenMP/Kokkos_OpenMP_Team.hpp | 28 +- .../OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp | 2 +- .../OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp | 2 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp | 2 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp | 28 +- .../Kokkos_OpenMPTarget_Parallel.hpp | 2 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp | 2 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp | 2 +- .../core/src/Qthreads/Kokkos_QthreadsExec.cpp | 2 +- .../core/src/Qthreads/Kokkos_QthreadsExec.hpp | 2 +- .../src/Qthreads/Kokkos_Qthreads_Parallel.hpp | 2 +- .../src/Qthreads/Kokkos_Qthreads_Task.cpp | 2 +- .../src/Qthreads/Kokkos_Qthreads_Task.hpp | 2 +- .../Qthreads/Kokkos_Qthreads_TaskQueue.hpp | 2 +- .../Kokkos_Qthreads_TaskQueue_impl.hpp | 2 +- .../KokkosExp_ROCm_IterateTile_Refactor.hpp | 2750 +++++++++++++++++ .../core/src/ROCm/Kokkos_ROCm_Atomic.hpp | 42 +- .../core/src/ROCm/Kokkos_ROCm_Config.hpp | 2 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.cpp | 2 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp | 125 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Impl.cpp | 10 +- .../core/src/ROCm/Kokkos_ROCm_Invoke.hpp | 2 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Join.hpp | 2 +- .../core/src/ROCm/Kokkos_ROCm_Parallel.hpp | 129 +- .../core/src/ROCm/Kokkos_ROCm_Reduce.hpp | 2 +- .../core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp | 2 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp | 2 +- .../core/src/ROCm/Kokkos_ROCm_Space.cpp | 2 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Task.cpp | 2 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Task.hpp | 2 +- lib/kokkos/core/src/ROCm/Kokkos_ROCm_Tile.hpp | 2 +- .../src/ROCm/Kokkos_ROCm_Vectorization.hpp | 2 +- .../core/src/Threads/Kokkos_ThreadsExec.cpp | 6 +- .../core/src/Threads/Kokkos_ThreadsExec.hpp | 2 +- .../src/Threads/Kokkos_ThreadsExec_base.cpp | 2 +- .../core/src/Threads/Kokkos_ThreadsTeam.hpp | 28 +- .../src/Threads/Kokkos_Threads_Parallel.hpp | 7 +- .../Kokkos_Threads_WorkGraphPolicy.hpp | 2 +- .../src/impl/KokkosExp_Host_IterateTile.hpp | 2 +- .../core/src/impl/KokkosExp_ViewMapping.hpp | 2 +- .../core/src/impl/Kokkos_AnalyzePolicy.hpp | 2 +- .../core/src/impl/Kokkos_Atomic_Assembly.hpp | 2 +- .../Kokkos_Atomic_Compare_Exchange_Strong.hpp | 14 +- .../core/src/impl/Kokkos_Atomic_Decrement.hpp | 33 +- .../core/src/impl/Kokkos_Atomic_Exchange.hpp | 22 +- .../core/src/impl/Kokkos_Atomic_Fetch_Add.hpp | 13 +- .../core/src/impl/Kokkos_Atomic_Fetch_And.hpp | 13 +- .../core/src/impl/Kokkos_Atomic_Fetch_Or.hpp | 13 +- .../core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp | 13 +- .../core/src/impl/Kokkos_Atomic_Generic.hpp | 6 +- .../core/src/impl/Kokkos_Atomic_Increment.hpp | 30 +- .../core/src/impl/Kokkos_Atomic_View.hpp | 14 +- .../core/src/impl/Kokkos_Atomic_Windows.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_BitOps.hpp | 6 +- .../core/src/impl/Kokkos_CPUDiscovery.cpp | 2 +- .../core/src/impl/Kokkos_CPUDiscovery.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_ClockTic.hpp | 2 +- .../core/src/impl/Kokkos_ConcurrentBitset.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_Core.cpp | 36 +- lib/kokkos/core/src/impl/Kokkos_Error.cpp | 2 +- lib/kokkos/core/src/impl/Kokkos_Error.hpp | 2 +- .../core/src/impl/Kokkos_ExecPolicy.cpp | 2 +- .../core/src/impl/Kokkos_FunctorAdapter.hpp | 584 +++- .../core/src/impl/Kokkos_FunctorAnalysis.hpp | 87 +- lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp | 2 +- .../core/src/impl/Kokkos_HostBarrier.cpp | 16 +- .../core/src/impl/Kokkos_HostBarrier.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp | 2 +- .../core/src/impl/Kokkos_HostThreadTeam.cpp | 2 +- .../core/src/impl/Kokkos_HostThreadTeam.hpp | 15 +- .../core/src/impl/Kokkos_MemoryPool.cpp | 2 +- .../core/src/impl/Kokkos_Memory_Fence.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp | 2 +- .../core/src/impl/Kokkos_PhysicalLayout.hpp | 6 +- .../src/impl/Kokkos_Profiling_DeviceInfo.hpp | 2 +- .../src/impl/Kokkos_Profiling_Interface.cpp | 46 +- .../src/impl/Kokkos_Profiling_Interface.hpp | 58 +- lib/kokkos/core/src/impl/Kokkos_Serial.cpp | 2 +- .../core/src/impl/Kokkos_Serial_Task.cpp | 2 +- .../core/src/impl/Kokkos_Serial_Task.hpp | 2 +- .../impl/Kokkos_Serial_WorkGraphPolicy.hpp | 2 +- .../core/src/impl/Kokkos_SharedAlloc.cpp | 23 +- .../core/src/impl/Kokkos_SharedAlloc.hpp | 43 +- lib/kokkos/core/src/impl/Kokkos_Spinwait.cpp | 85 +- lib/kokkos/core/src/impl/Kokkos_Spinwait.hpp | 52 +- .../core/src/impl/Kokkos_StaticAssert.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_Tags.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp | 19 +- .../core/src/impl/Kokkos_TaskQueue_impl.hpp | 4 +- lib/kokkos/core/src/impl/Kokkos_Timer.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_Traits.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_Utilities.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp | 2 +- .../core/src/impl/Kokkos_ViewMapping.hpp | 40 +- lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp | 2 +- .../core/src/impl/Kokkos_Volatile_Load.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_hwloc.cpp | 2 +- lib/kokkos/core/unit_test/CMakeLists.txt | 49 + lib/kokkos/core/unit_test/Makefile | 56 +- lib/kokkos/core/unit_test/TestAggregate.hpp | 2 +- lib/kokkos/core/unit_test/TestAtomic.hpp | 2 +- .../core/unit_test/TestAtomicOperations.hpp | 2 +- lib/kokkos/core/unit_test/TestAtomicViews.hpp | 33 +- lib/kokkos/core/unit_test/TestCXX11.hpp | 2 +- .../core/unit_test/TestCXX11Deduction.hpp | 2 +- .../core/unit_test/TestCompilerMacros.hpp | 4 +- lib/kokkos/core/unit_test/TestComplex.hpp | 2 +- .../core/unit_test/TestConcurrentBitset.hpp | 2 +- lib/kokkos/core/unit_test/TestCrs.hpp | 2 +- .../unit_test/TestDefaultDeviceTypeInit.hpp | 2 +- .../core/unit_test/TestFunctorAnalysis.hpp | 2 +- lib/kokkos/core/unit_test/TestHWLOC.cpp | 2 +- lib/kokkos/core/unit_test/TestInit.hpp | 2 +- lib/kokkos/core/unit_test/TestMDRange.hpp | 502 ++- lib/kokkos/core/unit_test/TestMemoryPool.hpp | 2 +- .../core/unit_test/TestPolicyConstruction.hpp | 159 +- lib/kokkos/core/unit_test/TestRange.hpp | 2 +- lib/kokkos/core/unit_test/TestReduce.hpp | 6 +- .../unit_test/TestReduceCombinatorical.hpp | 2 +- lib/kokkos/core/unit_test/TestResize.hpp | 2 +- lib/kokkos/core/unit_test/TestScan.hpp | 2 +- lib/kokkos/core/unit_test/TestSharedAlloc.hpp | 2 +- .../core/unit_test/TestTaskScheduler.hpp | 2 +- lib/kokkos/core/unit_test/TestTeam.hpp | 49 +- lib/kokkos/core/unit_test/TestTeamVector.hpp | 9 +- .../unit_test/TestTemplateMetaFunctions.hpp | 2 +- lib/kokkos/core/unit_test/TestTile.hpp | 14 +- lib/kokkos/core/unit_test/TestUniqueToken.hpp | 2 +- lib/kokkos/core/unit_test/TestUtilities.hpp | 2 +- lib/kokkos/core/unit_test/TestViewAPI.hpp | 430 ++- .../unit_test/TestViewCtorPropEmbeddedDim.hpp | 2 +- .../core/unit_test/TestViewMapping_a.hpp | 132 +- .../core/unit_test/TestViewMapping_b.hpp | 2 +- .../unit_test/TestViewMapping_subview.hpp | 62 +- lib/kokkos/core/unit_test/TestViewOfClass.hpp | 6 +- .../core/unit_test/TestViewSpaceAssign.hpp | 2 +- lib/kokkos/core/unit_test/TestViewSubview.hpp | 218 +- lib/kokkos/core/unit_test/TestWorkGraph.hpp | 2 +- lib/kokkos/core/unit_test/UnitTestMain.cpp | 2 +- .../core/unit_test/UnitTestMainInit.cpp | 2 +- .../unit_test/UnitTest_PushFinalizeHook.cpp | 139 + .../UnitTest_PushFinalizeHook_terminate.cpp | 86 + .../cuda/TestCudaHostPinned_Category.hpp | 2 +- .../cuda/TestCudaHostPinned_SharedAlloc.cpp | 2 +- .../cuda/TestCudaHostPinned_ViewAPI.cpp | 2 +- .../cuda/TestCudaHostPinned_ViewMapping_a.cpp | 2 +- .../cuda/TestCudaHostPinned_ViewMapping_b.cpp | 2 +- ...TestCudaHostPinned_ViewMapping_subview.cpp | 2 +- .../unit_test/cuda/TestCudaUVM_Category.hpp | 2 +- .../cuda/TestCudaUVM_SharedAlloc.cpp | 2 +- .../unit_test/cuda/TestCudaUVM_ViewAPI.cpp | 2 +- .../cuda/TestCudaUVM_ViewMapping_a.cpp | 2 +- .../cuda/TestCudaUVM_ViewMapping_b.cpp | 2 +- .../cuda/TestCudaUVM_ViewMapping_subview.cpp | 2 +- .../cuda/TestCuda_AtomicOperations.cpp | 2 +- .../unit_test/cuda/TestCuda_AtomicViews.cpp | 2 +- .../core/unit_test/cuda/TestCuda_Atomics.cpp | 2 +- .../core/unit_test/cuda/TestCuda_Category.hpp | 2 +- .../core/unit_test/cuda/TestCuda_Complex.cpp | 2 +- .../core/unit_test/cuda/TestCuda_Crs.cpp | 2 +- .../core/unit_test/cuda/TestCuda_Init.cpp | 2 +- .../core/unit_test/cuda/TestCuda_InterOp.cpp | 85 + .../core/unit_test/cuda/TestCuda_MDRange.cpp | 2 +- .../core/unit_test/cuda/TestCuda_Other.cpp | 2 +- .../unit_test/cuda/TestCuda_RangePolicy.cpp | 2 +- .../unit_test/cuda/TestCuda_Reductions.cpp | 2 +- .../core/unit_test/cuda/TestCuda_Scan.cpp | 2 +- .../unit_test/cuda/TestCuda_SharedAlloc.cpp | 2 +- .../core/unit_test/cuda/TestCuda_Spaces.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_a.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_b.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_c01.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_c02.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_c03.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_c04.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_c05.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_c06.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_c07.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_c08.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_c09.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_c10.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_c11.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_c12.cpp | 2 +- .../unit_test/cuda/TestCuda_SubView_c13.cpp | 2 +- .../core/unit_test/cuda/TestCuda_Task.cpp | 2 +- .../core/unit_test/cuda/TestCuda_Team.cpp | 2 +- .../cuda/TestCuda_TeamReductionScan.cpp | 2 +- .../unit_test/cuda/TestCuda_TeamScratch.cpp | 2 +- .../unit_test/cuda/TestCuda_UniqueToken.cpp | 2 +- .../unit_test/cuda/TestCuda_ViewAPI_b.cpp | 2 +- .../unit_test/cuda/TestCuda_ViewMapping_a.cpp | 2 +- .../unit_test/cuda/TestCuda_ViewMapping_b.cpp | 2 +- .../cuda/TestCuda_ViewMapping_subview.cpp | 2 +- .../unit_test/cuda/TestCuda_ViewOfClass.cpp | 2 +- .../unit_test/cuda/TestCuda_WorkGraph.cpp | 2 +- .../default/TestDefaultDeviceType.cpp | 2 +- .../default/TestDefaultDeviceTypeResize.cpp | 2 +- .../TestDefaultDeviceType_Category.hpp | 2 +- .../default/TestDefaultDeviceType_a.cpp | 2 +- .../default/TestDefaultDeviceType_b.cpp | 2 +- .../default/TestDefaultDeviceType_c.cpp | 2 +- .../default/TestDefaultDeviceType_d.cpp | 2 +- .../core/unit_test/openmp/TestOpenMP.hpp | 2 +- .../openmp/TestOpenMP_AtomicOperations.cpp | 2 +- .../openmp/TestOpenMP_AtomicViews.cpp | 2 +- .../unit_test/openmp/TestOpenMP_Atomics.cpp | 2 +- .../unit_test/openmp/TestOpenMP_Category.hpp | 2 +- .../unit_test/openmp/TestOpenMP_Complex.cpp | 2 +- .../core/unit_test/openmp/TestOpenMP_Crs.cpp | 2 +- .../core/unit_test/openmp/TestOpenMP_Init.cpp | 2 +- .../unit_test/openmp/TestOpenMP_InterOp.cpp | 90 + .../unit_test/openmp/TestOpenMP_MDRange.cpp | 2 +- .../unit_test/openmp/TestOpenMP_Other.cpp | 2 +- .../openmp/TestOpenMP_RangePolicy.cpp | 2 +- .../openmp/TestOpenMP_Reductions.cpp | 2 +- .../core/unit_test/openmp/TestOpenMP_Scan.cpp | 2 +- .../openmp/TestOpenMP_SharedAlloc.cpp | 2 +- .../unit_test/openmp/TestOpenMP_SubView_a.cpp | 2 +- .../unit_test/openmp/TestOpenMP_SubView_b.cpp | 2 +- .../openmp/TestOpenMP_SubView_c01.cpp | 2 +- .../openmp/TestOpenMP_SubView_c02.cpp | 2 +- .../openmp/TestOpenMP_SubView_c03.cpp | 2 +- .../openmp/TestOpenMP_SubView_c04.cpp | 2 +- .../openmp/TestOpenMP_SubView_c05.cpp | 2 +- .../openmp/TestOpenMP_SubView_c06.cpp | 2 +- .../openmp/TestOpenMP_SubView_c07.cpp | 2 +- .../openmp/TestOpenMP_SubView_c08.cpp | 2 +- .../openmp/TestOpenMP_SubView_c09.cpp | 2 +- .../openmp/TestOpenMP_SubView_c10.cpp | 2 +- .../openmp/TestOpenMP_SubView_c11.cpp | 2 +- .../openmp/TestOpenMP_SubView_c12.cpp | 2 +- .../openmp/TestOpenMP_SubView_c13.cpp | 2 +- .../core/unit_test/openmp/TestOpenMP_Task.cpp | 2 +- .../core/unit_test/openmp/TestOpenMP_Team.cpp | 2 +- .../openmp/TestOpenMP_TeamReductionScan.cpp | 2 +- .../openmp/TestOpenMP_TeamScratch.cpp | 2 +- .../openmp/TestOpenMP_UniqueToken.cpp | 2 +- .../unit_test/openmp/TestOpenMP_ViewAPI_b.cpp | 2 +- .../openmp/TestOpenMP_ViewMapping_a.cpp | 2 +- .../openmp/TestOpenMP_ViewMapping_b.cpp | 2 +- .../openmp/TestOpenMP_ViewMapping_subview.cpp | 2 +- .../openmp/TestOpenMP_ViewOfClass.cpp | 2 +- .../unit_test/openmp/TestOpenMP_WorkGraph.cpp | 2 +- .../openmptarget/TestOpenMPTarget.hpp | 2 +- .../TestOpenMPTarget_AtomicOperations.cpp | 2 +- .../TestOpenMPTarget_AtomicViews.cpp | 2 +- .../openmptarget/TestOpenMPTarget_Atomics.cpp | 2 +- .../TestOpenMPTarget_Category.hpp | 2 +- .../openmptarget/TestOpenMPTarget_Complex.cpp | 2 +- .../openmptarget/TestOpenMPTarget_Init.cpp | 2 +- .../openmptarget/TestOpenMPTarget_MDRange.cpp | 2 +- .../openmptarget/TestOpenMPTarget_Other.cpp | 2 +- .../TestOpenMPTarget_RangePolicy.cpp | 2 +- .../TestOpenMPTarget_Reductions.cpp | 2 +- .../openmptarget/TestOpenMPTarget_Scan.cpp | 2 +- .../TestOpenMPTarget_SharedAlloc.cpp | 2 +- .../TestOpenMPTarget_SubView_a.cpp | 2 +- .../TestOpenMPTarget_SubView_b.cpp | 2 +- .../TestOpenMPTarget_SubView_c01.cpp | 2 +- .../TestOpenMPTarget_SubView_c02.cpp | 2 +- .../TestOpenMPTarget_SubView_c03.cpp | 2 +- .../TestOpenMPTarget_SubView_c04.cpp | 2 +- .../TestOpenMPTarget_SubView_c05.cpp | 2 +- .../TestOpenMPTarget_SubView_c06.cpp | 2 +- .../TestOpenMPTarget_SubView_c07.cpp | 2 +- .../TestOpenMPTarget_SubView_c08.cpp | 2 +- .../TestOpenMPTarget_SubView_c09.cpp | 2 +- .../TestOpenMPTarget_SubView_c10.cpp | 2 +- .../TestOpenMPTarget_SubView_c11.cpp | 2 +- .../TestOpenMPTarget_SubView_c12.cpp | 2 +- .../openmptarget/TestOpenMPTarget_Team.cpp | 2 +- .../TestOpenMPTarget_TeamReductionScan.cpp | 2 +- .../TestOpenMPTarget_TeamScratch.cpp | 2 +- .../TestOpenMPTarget_ViewAPI_b.cpp | 2 +- .../TestOpenMPTarget_ViewMapping_a.cpp | 2 +- .../TestOpenMPTarget_ViewMapping_b.cpp | 2 +- .../TestOpenMPTarget_ViewMapping_subview.cpp | 2 +- .../TestOpenMPTarget_ViewOfClass.cpp | 2 +- .../core/unit_test/qthreads/TestQthreads.hpp | 2 +- .../qthreads/TestQthreads_Atomics.cpp | 2 +- .../qthreads/TestQthreads_Category.hpp | 2 +- .../unit_test/qthreads/TestQthreads_Other.cpp | 2 +- .../qthreads/TestQthreads_Reductions.cpp | 2 +- .../qthreads/TestQthreads_SubView_a.cpp | 2 +- .../qthreads/TestQthreads_SubView_b.cpp | 2 +- .../qthreads/TestQthreads_SubView_c01.cpp | 2 +- .../qthreads/TestQthreads_SubView_c02.cpp | 2 +- .../qthreads/TestQthreads_SubView_c03.cpp | 2 +- .../qthreads/TestQthreads_SubView_c04.cpp | 2 +- .../qthreads/TestQthreads_SubView_c05.cpp | 2 +- .../qthreads/TestQthreads_SubView_c06.cpp | 2 +- .../qthreads/TestQthreads_SubView_c07.cpp | 2 +- .../qthreads/TestQthreads_SubView_c08.cpp | 2 +- .../qthreads/TestQthreads_SubView_c09.cpp | 2 +- .../qthreads/TestQthreads_SubView_c10.cpp | 2 +- .../qthreads/TestQthreads_SubView_c11.cpp | 2 +- .../qthreads/TestQthreads_SubView_c12.cpp | 2 +- .../qthreads/TestQthreads_SubView_c13.cpp | 2 +- .../unit_test/qthreads/TestQthreads_Team.cpp | 2 +- .../qthreads/TestQthreads_ViewAPI_a.cpp | 2 +- .../qthreads/TestQthreads_ViewAPI_b.cpp | 2 +- .../rocm/TestROCmHostPinned_Category.hpp | 2 +- .../rocm/TestROCmHostPinned_SharedAlloc.cpp | 2 +- .../rocm/TestROCmHostPinned_ViewAPI.cpp | 2 +- .../rocm/TestROCmHostPinned_ViewMapping_a.cpp | 2 +- .../rocm/TestROCmHostPinned_ViewMapping_b.cpp | 2 +- ...TestROCmHostPinned_ViewMapping_subview.cpp | 2 +- .../rocm/TestROCm_AtomicOperations.cpp | 2 +- .../unit_test/rocm/TestROCm_AtomicViews.cpp | 2 +- .../core/unit_test/rocm/TestROCm_Atomics.cpp | 2 +- .../core/unit_test/rocm/TestROCm_Category.hpp | 2 +- .../core/unit_test/rocm/TestROCm_Complex.cpp | 2 +- .../core/unit_test/rocm/TestROCm_Init.cpp | 2 +- .../core/unit_test/rocm/TestROCm_MDRange.cpp | 47 + .../core/unit_test/rocm/TestROCm_Other.cpp | 2 +- .../unit_test/rocm/TestROCm_RangePolicy.cpp | 2 +- .../unit_test/rocm/TestROCm_Reductions.cpp | 2 +- .../core/unit_test/rocm/TestROCm_Scan.cpp | 2 +- .../unit_test/rocm/TestROCm_SharedAlloc.cpp | 2 +- .../core/unit_test/rocm/TestROCm_Spaces.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_a.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_b.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_c01.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_c02.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_c03.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_c04.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_c05.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_c06.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_c07.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_c08.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_c09.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_c10.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_c11.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_c12.cpp | 2 +- .../core/unit_test/rocm/TestROCm_Team.cpp | 2 +- .../rocm/TestROCm_TeamReductionScan.cpp | 2 +- .../unit_test/rocm/TestROCm_TeamScratch.cpp | 2 +- .../unit_test/rocm/TestROCm_ViewAPI_b.cpp | 2 +- .../unit_test/rocm/TestROCm_ViewMapping_a.cpp | 2 +- .../unit_test/rocm/TestROCm_ViewMapping_b.cpp | 2 +- .../rocm/TestROCm_ViewMapping_subview.cpp | 2 +- .../unit_test/rocm/TestROCm_ViewOfClass.cpp | 2 +- .../serial/TestSerial_AtomicOperations.cpp | 2 +- .../serial/TestSerial_AtomicViews.cpp | 2 +- .../unit_test/serial/TestSerial_Atomics.cpp | 2 +- .../unit_test/serial/TestSerial_Category.hpp | 2 +- .../unit_test/serial/TestSerial_Complex.cpp | 2 +- .../core/unit_test/serial/TestSerial_Crs.cpp | 2 +- .../core/unit_test/serial/TestSerial_Init.cpp | 2 +- .../unit_test/serial/TestSerial_MDRange.cpp | 2 +- .../unit_test/serial/TestSerial_Other.cpp | 2 +- .../serial/TestSerial_RangePolicy.cpp | 2 +- .../serial/TestSerial_Reductions.cpp | 2 +- .../core/unit_test/serial/TestSerial_Scan.cpp | 2 +- .../serial/TestSerial_SharedAlloc.cpp | 2 +- .../unit_test/serial/TestSerial_SubView_a.cpp | 2 +- .../unit_test/serial/TestSerial_SubView_b.cpp | 2 +- .../serial/TestSerial_SubView_c01.cpp | 2 +- .../serial/TestSerial_SubView_c02.cpp | 2 +- .../serial/TestSerial_SubView_c03.cpp | 2 +- .../serial/TestSerial_SubView_c04.cpp | 2 +- .../serial/TestSerial_SubView_c05.cpp | 2 +- .../serial/TestSerial_SubView_c06.cpp | 2 +- .../serial/TestSerial_SubView_c07.cpp | 2 +- .../serial/TestSerial_SubView_c08.cpp | 2 +- .../serial/TestSerial_SubView_c09.cpp | 2 +- .../serial/TestSerial_SubView_c10.cpp | 2 +- .../serial/TestSerial_SubView_c11.cpp | 2 +- .../serial/TestSerial_SubView_c12.cpp | 2 +- .../serial/TestSerial_SubView_c13.cpp | 2 +- .../core/unit_test/serial/TestSerial_Task.cpp | 2 +- .../core/unit_test/serial/TestSerial_Team.cpp | 2 +- .../serial/TestSerial_TeamReductionScan.cpp | 2 +- .../serial/TestSerial_TeamScratch.cpp | 2 +- .../unit_test/serial/TestSerial_ViewAPI_b.cpp | 2 +- .../serial/TestSerial_ViewMapping_a.cpp | 2 +- .../serial/TestSerial_ViewMapping_b.cpp | 2 +- .../serial/TestSerial_ViewMapping_subview.cpp | 2 +- .../serial/TestSerial_ViewOfClass.cpp | 2 +- .../unit_test/serial/TestSerial_WorkGraph.cpp | 2 +- .../core/unit_test/threads/TestThreads.hpp | 2 +- .../threads/TestThreads_AtomicOperations.cpp | 2 +- .../threads/TestThreads_AtomicViews.cpp | 2 +- .../unit_test/threads/TestThreads_Atomics.cpp | 2 +- .../threads/TestThreads_Category.hpp | 2 +- .../unit_test/threads/TestThreads_Complex.cpp | 2 +- .../unit_test/threads/TestThreads_Crs.cpp | 2 +- .../unit_test/threads/TestThreads_Init.cpp | 2 +- .../unit_test/threads/TestThreads_MDRange.cpp | 2 +- .../unit_test/threads/TestThreads_Other.cpp | 2 +- .../threads/TestThreads_RangePolicy.cpp | 2 +- .../threads/TestThreads_Reductions.cpp | 2 +- .../unit_test/threads/TestThreads_Scan.cpp | 2 +- .../threads/TestThreads_SharedAlloc.cpp | 2 +- .../threads/TestThreads_SubView_a.cpp | 2 +- .../threads/TestThreads_SubView_b.cpp | 2 +- .../threads/TestThreads_SubView_c01.cpp | 2 +- .../threads/TestThreads_SubView_c02.cpp | 2 +- .../threads/TestThreads_SubView_c03.cpp | 2 +- .../threads/TestThreads_SubView_c04.cpp | 2 +- .../threads/TestThreads_SubView_c05.cpp | 2 +- .../threads/TestThreads_SubView_c06.cpp | 2 +- .../threads/TestThreads_SubView_c07.cpp | 2 +- .../threads/TestThreads_SubView_c08.cpp | 2 +- .../threads/TestThreads_SubView_c09.cpp | 2 +- .../threads/TestThreads_SubView_c10.cpp | 2 +- .../threads/TestThreads_SubView_c11.cpp | 2 +- .../threads/TestThreads_SubView_c12.cpp | 2 +- .../threads/TestThreads_SubView_c13.cpp | 2 +- .../unit_test/threads/TestThreads_Team.cpp | 2 +- .../threads/TestThreads_TeamReductionScan.cpp | 2 +- .../threads/TestThreads_TeamScratch.cpp | 2 +- .../threads/TestThreads_ViewAPI_b.cpp | 2 +- .../threads/TestThreads_ViewMapping_a.cpp | 2 +- .../threads/TestThreads_ViewMapping_b.cpp | 2 +- .../TestThreads_ViewMapping_subview.cpp | 2 +- .../threads/TestThreads_ViewOfClass.cpp | 2 +- .../threads/TestThreads_WorkGraph.cpp | 2 +- .../query_cuda_arch.cpp | 0 .../{config => doc}/kokkos-promotion.txt | 16 +- .../example/cmake_build/cmake_example.cpp | 2 +- lib/kokkos/example/common/VectorImport.hpp | 2 +- lib/kokkos/example/common/WrapMPI.hpp | 2 +- lib/kokkos/example/feint/ElemFunctor.hpp | 14 +- lib/kokkos/example/feint/feint.hpp | 2 +- lib/kokkos/example/feint/feint_cuda.cpp | 2 +- lib/kokkos/example/feint/feint_fwd.hpp | 2 +- lib/kokkos/example/feint/feint_openmp.cpp | 2 +- lib/kokkos/example/feint/feint_rocm.cpp | 2 +- lib/kokkos/example/feint/feint_threads.cpp | 2 +- lib/kokkos/example/feint/main.cpp | 2 +- lib/kokkos/example/fenl/CGSolve.hpp | 4 +- lib/kokkos/example/fenl/fenl.cpp | 2 +- lib/kokkos/example/fenl/fenl.hpp | 2 +- lib/kokkos/example/fenl/fenl_functors.hpp | 60 +- lib/kokkos/example/fenl/fenl_impl.hpp | 8 +- lib/kokkos/example/fenl/main.cpp | 2 +- lib/kokkos/example/fixture/BoxElemFixture.hpp | 26 +- lib/kokkos/example/fixture/BoxElemPart.cpp | 2 +- lib/kokkos/example/fixture/BoxElemPart.hpp | 2 +- lib/kokkos/example/fixture/HexElement.hpp | 2 +- lib/kokkos/example/fixture/Main.cpp | 2 +- lib/kokkos/example/fixture/TestFixture.cpp | 2 +- lib/kokkos/example/fixture/TestFixture.hpp | 4 +- lib/kokkos/example/global_2_local_ids/G2L.hpp | 2 +- .../example/global_2_local_ids/G2L_Main.cpp | 2 +- lib/kokkos/example/grow_array/grow_array.hpp | 2 +- lib/kokkos/example/grow_array/main.cpp | 2 +- lib/kokkos/example/md_skeleton/force.cpp | 2 +- lib/kokkos/example/md_skeleton/main.cpp | 2 +- lib/kokkos/example/md_skeleton/neighbor.cpp | 2 +- lib/kokkos/example/md_skeleton/setup.cpp | 2 +- lib/kokkos/example/md_skeleton/system.h | 2 +- lib/kokkos/example/md_skeleton/types.h | 2 +- .../example/multi_fem/BoxMeshFixture.hpp | 2 +- .../example/multi_fem/BoxMeshPartition.cpp | 2 +- .../example/multi_fem/BoxMeshPartition.hpp | 2 +- lib/kokkos/example/multi_fem/Explicit.hpp | 2 +- .../example/multi_fem/ExplicitFunctors.hpp | 2 +- lib/kokkos/example/multi_fem/FEMesh.hpp | 2 +- lib/kokkos/example/multi_fem/HexElement.hpp | 2 +- .../multi_fem/HexExplicitFunctions.hpp | 2 +- lib/kokkos/example/multi_fem/Implicit.hpp | 2 +- .../example/multi_fem/ImplicitFunctors.hpp | 2 +- lib/kokkos/example/multi_fem/LinAlgBLAS.hpp | 2 +- lib/kokkos/example/multi_fem/Nonlinear.hpp | 2 +- .../multi_fem/NonlinearElement_Cuda.hpp | 2 +- .../example/multi_fem/NonlinearFunctors.hpp | 2 +- lib/kokkos/example/multi_fem/ParallelComm.hpp | 2 +- .../example/multi_fem/ParallelDataMap.hpp | 2 +- .../example/multi_fem/ParallelMachine.cpp | 2 +- .../example/multi_fem/ParallelMachine.hpp | 2 +- .../example/multi_fem/SparseLinearSystem.hpp | 2 +- .../multi_fem/SparseLinearSystemFill.hpp | 2 +- .../multi_fem/SparseLinearSystem_Cuda.hpp | 2 +- .../example/multi_fem/TestBoxMeshFixture.hpp | 2 +- .../multi_fem/TestBoxMeshPartition.cpp | 2 +- lib/kokkos/example/multi_fem/TestCuda.cpp | 2 +- lib/kokkos/example/multi_fem/TestHost.cpp | 2 +- .../example/multi_fem/TestHybridFEM.cpp | 2 +- .../example/query_device/query_device.cpp | 2 +- lib/kokkos/example/sort_array/main.cpp | 2 +- lib/kokkos/example/sort_array/sort_array.hpp | 2 +- .../tutorial/01_hello_world/hello_world.cpp | 2 +- .../hello_world_lambda.cpp | 2 +- .../02_simple_reduce/simple_reduce.cpp | 2 +- .../simple_reduce_lambda.cpp | 2 +- .../tutorial/03_simple_view/simple_view.cpp | 2 +- .../simple_view_lambda.cpp | 2 +- .../simple_memoryspaces.cpp | 2 +- .../05_simple_atomics/simple_atomics.cpp | 6 +- .../simple_mdrangepolicy.cpp | 2 +- .../01_data_layouts/data_layouts.cpp | 8 +- .../02_memory_traits/memory_traits.cpp | 8 +- .../Advanced_Views/03_subviews/subviews.cpp | 28 +- .../Advanced_Views/04_dualviews/dual_view.cpp | 8 +- .../05_NVIDIA_UVM/uvm_example.cpp | 8 +- .../overlapping_deepcopy.cpp | 2 +- .../01_random_numbers/random_numbers.cpp | 2 +- .../01_thread_teams/thread_teams.cpp | 2 +- .../thread_teams_lambda.cpp | 2 +- .../nested_parallel_for.cpp | 2 +- .../03_vectorization/vectorization.cpp | 12 +- .../04_team_scan/team_scan.cpp | 2 +- .../launch_bounds/launch_bounds_reduce.cpp | 2 +- lib/kokkos/generate_makefile.bash | 51 +- lib/kokkos/{config => }/master_history.txt | 3 +- lib/kokkos/{config => scripts}/snapshot.py | 0 .../testing_scripts/README | 0 .../testing_scripts/jenkins_test_driver | 0 .../testing_scripts/obj_size_opt_check | 0 .../test_kokkos_master_develop_promotion.sh | 0 .../trilinos-integration/checkin-test | 0 .../prepare_trilinos_repos.sh | 2 +- .../shepard_jenkins_run_script_pthread_intel | 2 +- .../shepard_jenkins_run_script_serial_intel | 2 +- .../white_run_jenkins_script_cuda | 2 +- .../white_run_jenkins_script_omp | 2 +- 694 files changed, 12261 insertions(+), 6745 deletions(-) delete mode 100644 lib/kokkos/config/configure_compton_cpu.sh delete mode 100644 lib/kokkos/config/configure_compton_mic.sh delete mode 100644 lib/kokkos/config/configure_kokkos.sh delete mode 100755 lib/kokkos/config/configure_kokkos_bgq.sh delete mode 100755 lib/kokkos/config/configure_kokkos_dev.sh delete mode 100644 lib/kokkos/config/configure_kokkos_nvidia.sh delete mode 100644 lib/kokkos/config/configure_shannon.sh delete mode 100755 lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh delete mode 100644 lib/kokkos/config/kokkos-trilinos-integration-procedure.txt delete mode 100755 lib/kokkos/config/kokkos_dev/config-core-all.sh delete mode 100755 lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh delete mode 100755 lib/kokkos/config/kokkos_dev/config-core-cuda.sh delete mode 100755 lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh delete mode 100755 lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh delete mode 100755 lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh delete mode 100755 lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh delete mode 100755 lib/kokkos/config/kokkos_dev/config-core-omp.sh delete mode 100755 lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh delete mode 100755 lib/kokkos/config/nvcc_wrapper create mode 100644 lib/kokkos/core/perf_test/PerfTest_ViewCopy.cpp create mode 100644 lib/kokkos/core/src/Kokkos_AnonymousSpace.hpp create mode 100644 lib/kokkos/core/src/Kokkos_CopyViews.hpp create mode 100644 lib/kokkos/core/src/ROCm/KokkosExp_ROCm_IterateTile_Refactor.hpp create mode 100644 lib/kokkos/core/unit_test/UnitTest_PushFinalizeHook.cpp create mode 100644 lib/kokkos/core/unit_test/UnitTest_PushFinalizeHook_terminate.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_InterOp.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_InterOp.cpp create mode 100644 lib/kokkos/core/unit_test/rocm/TestROCm_MDRange.cpp rename lib/kokkos/{config => doc/hardware_identification}/query_cuda_arch.cpp (100%) rename lib/kokkos/{config => doc}/kokkos-promotion.txt (86%) rename lib/kokkos/{config => }/master_history.txt (87%) rename lib/kokkos/{config => scripts}/snapshot.py (100%) rename lib/kokkos/{config => scripts}/testing_scripts/README (100%) rename lib/kokkos/{config => scripts}/testing_scripts/jenkins_test_driver (100%) rename lib/kokkos/{config => scripts}/testing_scripts/obj_size_opt_check (100%) rename lib/kokkos/{config => scripts/testing_scripts}/test_kokkos_master_develop_promotion.sh (100%) rename lib/kokkos/{config => scripts}/trilinos-integration/checkin-test (100%) rename lib/kokkos/{config => scripts}/trilinos-integration/prepare_trilinos_repos.sh (93%) rename lib/kokkos/{config => scripts}/trilinos-integration/shepard_jenkins_run_script_pthread_intel (91%) rename lib/kokkos/{config => scripts}/trilinos-integration/shepard_jenkins_run_script_serial_intel (91%) rename lib/kokkos/{config => scripts}/trilinos-integration/white_run_jenkins_script_cuda (91%) rename lib/kokkos/{config => scripts}/trilinos-integration/white_run_jenkins_script_omp (91%) diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index e3de6b048d..feb2bd547f 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,49 @@ # Change Log +## [2.6.00](https://github.com/kokkos/kokkos/tree/2.6.00) (2018-03-07) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.5.00...2.6.00) + +**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.6** + +**Implemented enhancements:** + +- Support NVIDIA Volta microarchitecture [\#1466](https://github.com/kokkos/kokkos/issues/1466) +- Kokkos - Define empty functions when profiling disabled [\#1424](https://github.com/kokkos/kokkos/issues/1424) +- Don't use \_\_constant\_\_ cache for lock arrays, enable once per run update instead of once per call [\#1385](https://github.com/kokkos/kokkos/issues/1385) +- task dag enhancement. [\#1354](https://github.com/kokkos/kokkos/issues/1354) +- Cuda task team collectives and stack size [\#1353](https://github.com/kokkos/kokkos/issues/1353) +- Replace View operator acceptance of more than rank integers with 'access' function [\#1333](https://github.com/kokkos/kokkos/issues/1333) +- Interoperability: Do not shut down backend execution space runtimes upon calling finalize. [\#1305](https://github.com/kokkos/kokkos/issues/1305) +- shmem\_size for LayoutStride [\#1291](https://github.com/kokkos/kokkos/issues/1291) +- Kokkos::resize performs poorly on 1D Views [\#1270](https://github.com/kokkos/kokkos/issues/1270) +- stride\(\) is inconsistent with dimension\(\), extent\(\), etc. [\#1214](https://github.com/kokkos/kokkos/issues/1214) +- Kokkos::sort defaults to std::sort on host [\#1208](https://github.com/kokkos/kokkos/issues/1208) +- DynamicView with host size grow [\#1206](https://github.com/kokkos/kokkos/issues/1206) +- Unmanaged View with Anonymous Memory Space [\#1175](https://github.com/kokkos/kokkos/issues/1175) +- Sort subset of Kokkos::DynamicView [\#1160](https://github.com/kokkos/kokkos/issues/1160) +- MDRange policy doesn't support lambda reductions [\#1054](https://github.com/kokkos/kokkos/issues/1054) +- Add ability to set hook on Kokkos::finalize [\#714](https://github.com/kokkos/kokkos/issues/714) +- Atomics with Serial Backend - Default should be Disable? [\#549](https://github.com/kokkos/kokkos/issues/549) +- KOKKOS\_ENABLE\_DEPRECATED\_CODE [\#1359](https://github.com/kokkos/kokkos/issues/1359) + +**Fixed bugs:** + +- cuda\_internal\_maximum\_warp\_count returns 8, but I believe it should return 16 for P100 [\#1269](https://github.com/kokkos/kokkos/issues/1269) +- Cuda: level 1 scratch memory bug \(reported by Stan Moore\) [\#1434](https://github.com/kokkos/kokkos/issues/1434) +- MDRangePolicy Reduction requires value\_type typedef in Functor [\#1379](https://github.com/kokkos/kokkos/issues/1379) +- Kokkos DeepCopy between empty views fails [\#1369](https://github.com/kokkos/kokkos/issues/1369) +- Several issues with new CMake build infrastructure \(reported by Eric Phipps\) [\#1365](https://github.com/kokkos/kokkos/issues/1365) +- deep\_copy between rank-1 host/device views of differing layouts without UVM no longer works \(reported by Eric Phipps\) [\#1363](https://github.com/kokkos/kokkos/issues/1363) +- Profiling can't be disabled in CMake, and a parallel\_for is missing for tasks \(reported by Kyungjoo Kim\) [\#1349](https://github.com/kokkos/kokkos/issues/1349) +- get\_work\_partition int overflow \(reported by berryj5\) [\#1327](https://github.com/kokkos/kokkos/issues/1327) +- Kokkos::deep\_copy must fence even if the two views are the same [\#1303](https://github.com/kokkos/kokkos/issues/1303) +- CudaUVMSpace::allocate/deallocate must fence [\#1302](https://github.com/kokkos/kokkos/issues/1302) +- ViewResize on CUDA fails in Debug because of too many resources requested [\#1299](https://github.com/kokkos/kokkos/issues/1299) +- Cuda 9 and intrepid2 calls from Panzer. [\#1183](https://github.com/kokkos/kokkos/issues/1183) +- Slowdown due to tracking\_enabled\(\) in 2.04.00 \(found by Albany app\) [\#1016](https://github.com/kokkos/kokkos/issues/1016) +- Bounds checking fails with zero-span Views \(reported by Stan Moore\) [\#1411](https://github.com/kokkos/kokkos/issues/1411) + + ## [2.5.00](https://github.com/kokkos/kokkos/tree/2.5.00) (2017-12-15) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.11...2.5.00) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 9c708ded4a..cd1f4ea981 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -7,7 +7,7 @@ ELSE() ENDIF() IF(NOT KOKKOS_HAS_TRILINOS) - cmake_minimum_required(VERSION 3.1 FATAL_ERROR) + cmake_minimum_required(VERSION 3.3 FATAL_ERROR) # Define Project Name if this is a standalone build IF(NOT DEFINED ${PROJECT_NAME}) @@ -37,9 +37,19 @@ IF(NOT KOKKOS_HAS_TRILINOS) COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings WORKING_DIRECTORY "${Kokkos_BINARY_DIR}" OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out - RESULT_VARIABLE res + RESULT_VARIABLE GEN_SETTINGS_RESULT ) + if (GEN_SETTINGS_RESULT) + message(FATAL_ERROR "Kokkos settings generation failed:\n" + "${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings") + endif() include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake) + string(REPLACE " " ";" KOKKOS_TPL_INCLUDE_DIRS "${KOKKOS_GMAKE_TPL_INCLUDE_DIRS}") + string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_DIRS "${KOKKOS_GMAKE_TPL_LIBRARY_DIRS}") + string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_NAMES "${KOKKOS_GMAKE_TPL_LIBRARY_NAMES}") + list(REMOVE_ITEM KOKKOS_TPL_INCLUDE_DIRS "") + list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_DIRS "") + list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_NAMES "") set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) #------------ NOW BUILD ------------------------------------------------------ diff --git a/lib/kokkos/Copyright.txt b/lib/kokkos/Copyright.txt index 05980758fa..50b76995af 100644 --- a/lib/kokkos/Copyright.txt +++ b/lib/kokkos/Copyright.txt @@ -34,7 +34,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/HOW_TO_SNAPSHOT b/lib/kokkos/HOW_TO_SNAPSHOT index 46bfb4167f..ad3f78efb4 100644 --- a/lib/kokkos/HOW_TO_SNAPSHOT +++ b/lib/kokkos/HOW_TO_SNAPSHOT @@ -19,7 +19,7 @@ snapshot Kokkos from github.com/kokkos to Trilinos. 3) Snapshot the current commit in the Kokkos clone into the Trilinos clone. This overwrites ${TRILINOS}/packages/kokkos with the content of ${KOKKOS}: - ${KOKKOS}/config/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages + ${KOKKOS}/scripts/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages 4) Verify the snapshot commit happened as expected cd ${TRILINOS}/packages/kokkos diff --git a/lib/kokkos/LICENSE b/lib/kokkos/LICENSE index 7341a699d6..c68a8a2a9f 100644 --- a/lib/kokkos/LICENSE +++ b/lib/kokkos/LICENSE @@ -36,7 +36,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 4315b009d5..a7bb63f190 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -9,8 +9,8 @@ KOKKOS_DEVICES ?= "OpenMP" #KOKKOS_DEVICES ?= "Pthreads" # Options: # Intel: KNC,KNL,SNB,HSW,BDW,SKX -# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61 -# ARM: ARMv80,ARMv81,ARMv8-ThunderX +# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72 +# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2 # IBM: BGQ,Power7,Power8,Power9 # AMD-GPUS: Kaveri,Carrizo,Fiji,Vega # AMD-CPUS: AMDAVX,Ryzen,Epyc @@ -21,7 +21,7 @@ KOKKOS_DEBUG ?= "no" KOKKOS_USE_TPLS ?= "" # Options: c++11,c++1z KOKKOS_CXX_STANDARD ?= "c++11" -# Options: aggressive_vectorization,disable_profiling +# Options: aggressive_vectorization,disable_profiling,disable_deprecated_code KOKKOS_OPTIONS ?= "" # Default settings specific options. @@ -48,6 +48,7 @@ KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),exper KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings) KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization) KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling) +KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code) KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check) KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print) KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),use_ldg) @@ -93,7 +94,7 @@ KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_VE KOKKOS_INTERNAL_COMPILER_PGI := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),PGI) KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)) KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)) -KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l)) +KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l)) KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang) KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),apple-darwin) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) @@ -229,12 +230,16 @@ KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ma KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell53) KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal61) KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal60) +KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70) +KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72) KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ + + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ + + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53)) @@ -249,6 +254,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ + + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ + + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53)) @@ -267,7 +274,8 @@ endif KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv80) KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv81) KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX) -KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX) | bc)) +KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2) +KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2) | bc)) # IBM based. KOKKOS_INTERNAL_USE_ARCH_BGQ := $(call kokkos_has_string,$(KOKKOS_ARCH),BGQ) @@ -316,6 +324,9 @@ endif # Generating the list of Flags. KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src +KOKKOS_TPL_INCLUDE_DIRS = +KOKKOS_TPL_LIBRARY_DIRS = +KOKKOS_TPL_LIBRARY_NAMES = KOKKOS_CXXFLAGS = ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1) @@ -323,7 +334,9 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1) endif KOKKOS_LIBS = -ldl +KOKKOS_TPL_LIBRARY_NAMES += dl KOKKOS_LDFLAGS = -L$(shell pwd) +KOKKOS_LINK_FLAGS = KOKKOS_SRC = KOKKOS_HEADERS = @@ -437,21 +450,32 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) - KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include - KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib + ifneq ($(HWLOC_PATH),) + KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include + KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib + KOKKOS_TPL_INCLUDE_DIRS += $(HWLOC_PATH)/include + KOKKOS_TPL_LIBRARY_DIRS += $(HWLOC_PATH)/lib + endif KOKKOS_LIBS += -lhwloc + KOKKOS_TPL_LIBRARY_NAMES += hwloc tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HWLOC") endif ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_LIBRT") KOKKOS_LIBS += -lrt + KOKKOS_TPL_LIBRARY_NAMES += rt endif ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) - KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include - KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib + ifneq ($(MEMKIND_PATH),) + KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include + KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib + KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include + KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib + endif KOKKOS_LIBS += -lmemkind -lnuma + KOKKOS_TPL_LIBRARY_NAMES += memkind numa tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HBWSPACE") endif @@ -459,6 +483,10 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING") endif +ifeq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 0) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE") +endif + tmp := $(call kokkos_append_header,"/* Optimization Settings */") ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1) @@ -560,6 +588,24 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX2") + + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + KOKKOS_CXXFLAGS += -mtune=thunderx2t99 -mcpu=thunderx2t99 + KOKKOS_LDFLAGS += -mtune=thunderx2t99 -mcpu=thunderx2t99 + endif + endif +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_SSE42") @@ -754,10 +800,11 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch - endif - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch - KOKKOS_CXXFLAGS += -x cuda + else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch + KOKKOS_CXXFLAGS += -x cuda + else + $(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) @@ -805,6 +852,16 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL61") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61 endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA70") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA72") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72 + endif ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) @@ -850,6 +907,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags) KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm + KOKKOS_TPL_LIBRARY_NAMES += hc_am m KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp) @@ -880,13 +938,17 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) - KOKKOS_CPPFLAGS += -I$(CUDA_PATH)/include - KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 - KOKKOS_LIBS += -lcudart -lcuda - - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH) + ifneq ($(CUDA_PATH),) + KOKKOS_CPPFLAGS += -I$(CUDA_PATH)/include + KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 + KOKKOS_TPL_INCLUDE_DIRS += $(CUDA_PATH)/include + KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib64 + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH) + endif endif + KOKKOS_LIBS += -lcudart -lcuda + KOKKOS_TPL_LIBRARY_NAMES += cudart cuda endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) @@ -911,20 +973,27 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) endif KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) + KOKKOS_LINK_FLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) KOKKOS_LIBS += -lpthread + KOKKOS_TPL_LIBRARY_NAMES += pthread endif ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp) - KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include - KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib + ifneq ($(QTHREADS_PATH),) + KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include + KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib + KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include + KOKKOS_TPL_LIBRARY_DIRS += $(QTHREADS_PATH)/lib64 + endif KOKKOS_LIBS += -lqthread + KOKKOS_TPL_LIBRARY_NAMES += qthread endif # Explicitly set the GCC Toolchain for Clang. @@ -940,11 +1009,6 @@ ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC)) endif -# Don't include Kokkos_Profiling_Interface.cpp if not using profiling to avoid a link warning. -ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1) - KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp,$(KOKKOS_SRC)) -endif - # Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial # device to avoid a link warning. ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) diff --git a/lib/kokkos/README b/lib/kokkos/README index d6c66634dd..31d134bf0a 100644 --- a/lib/kokkos/README +++ b/lib/kokkos/README @@ -1,87 +1,101 @@ -Kokkos implements a programming model in C++ for writing performance portable +Kokkos Core implements a programming model in C++ for writing performance portable applications targeting all major HPC platforms. For that purpose it provides abstractions for both parallel execution of code and data management. Kokkos is designed to target complex node architectures with N-level memory hierarchies and multiple types of execution resources. It currently can use OpenMP, Pthreads and CUDA as backend programming models. -Kokkos is licensed under standard 3-clause BSD terms of use. For specifics -see the LICENSE file contained in the repository or distribution. +Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem, +which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as +profiling and debugging tools (https://github.com/kokkos/kokkos-tools). -The core developers of Kokkos are Carter Edwards and Christian Trott -at the Computer Science Research Institute of the Sandia National -Laboratories. +# Learning about Kokkos -The KokkosP interface and associated tools are developed by the Application -Performance Team and Kokkos core developers at Sandia National Laboratories. +A programming guide can be found on the Wiki, the API reference is under development. -To learn more about Kokkos consider watching one of our presentations: -GTC 2015: - http://on-demand.gputechconf.com/gtc/2015/video/S5166.html - http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf +For questions find us on Slack: https://kokkosteam.slack.com or open a github issue. -A programming guide can be found under doc/Kokkos_PG.pdf. This is an initial version -and feedback is greatly appreciated. +For non-public questions send an email to +crtrott(at)sandia.gov A separate repository with extensive tutorial material can be found under https://github.com/kokkos/kokkos-tutorials. -If you have a patch to contribute please feel free to issue a pull request against -the develop branch. For major contributions it is better to contact us first -for guidance. +Furthermore, the 'example/tutorial' directory provides step by step tutorial +examples which explain many of the features of Kokkos. They work with +simple Makefiles. To build with g++ and OpenMP simply type 'make' +in the 'example/tutorial' directory. This will build all examples in the +subfolders. To change the build options refer to the Programming Guide +in the compilation section. -For questions please send an email to -kokkos-users@software.sandia.gov +To learn more about Kokkos consider watching one of our presentations: +* GTC 2015: + - http://on-demand.gputechconf.com/gtc/2015/video/S5166.html + - http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf -For non-public questions send an email to -hcedwar(at)sandia.gov and crtrott(at)sandia.gov -============================================================================ -====Requirements============================================================ -============================================================================ +# Contributing to Kokkos -Primary tested compilers on X86 are: - GCC 4.8.4 - GCC 4.9.3 - GCC 5.1.0 - GCC 5.3.0 - GCC 6.1.0 - Intel 15.0.2 - Intel 16.0.1 - Intel 17.1.043 - Intel 17.4.196 - Intel 18.0.128 - Clang 3.5.2 - Clang 3.6.1 - Clang 3.7.1 - Clang 3.8.1 - Clang 3.9.0 - Clang 4.0.0 - Clang 4.0.0 for CUDA (CUDA Toolkit 8.0.44) - PGI 17.10 - NVCC 7.0 for CUDA (with gcc 4.8.4) - NVCC 7.5 for CUDA (with gcc 4.8.4) - NVCC 8.0.44 for CUDA (with gcc 5.3.0) +We are open and try to encourage contributions from external developers. +To do so please first open an issue describing the contribution and then issue +a pull request against the develop branch. For larger features it may be good +to get guidance from the core development team first through the github issue. -Primary tested compilers on Power 8 are: - GCC 5.4.0 (OpenMP,Serial) - IBM XL 13.1.5 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug) - NVCC 8.0.44 for CUDA (with gcc 5.4.0) - NVCC 9.0.103 for CUDA (with gcc 6.3.0) +Note that Kokkos Core is licensed under standard 3-clause BSD terms of use. +Which means contributing to Kokkos allows anyone else to use your contributions +not just for public purposes but also for closed source commercial projects. +For specifics see the LICENSE file contained in the repository or distribution. -Primary tested compilers on Intel KNL are: - GCC 6.2.0 - Intel 16.4.258 (with gcc 4.7.2) - Intel 17.2.174 (with gcc 4.9.3) - Intel 18.0.128 (with gcc 4.9.3) +# Requirements -Other compilers working: - X86: - Cygwin 2.1.0 64bit with gcc 4.9.3 +### Primary tested compilers on X86 are: + * GCC 4.8.4 + * GCC 4.9.3 + * GCC 5.1.0 + * GCC 5.3.0 + * GCC 6.1.0 + * Intel 15.0.2 + * Intel 16.0.1 + * Intel 17.1.043 + * Intel 17.4.196 + * Intel 18.0.128 + * Clang 3.6.1 + * Clang 3.7.1 + * Clang 3.8.1 + * Clang 3.9.0 + * Clang 4.0.0 + * Clang 4.0.0 for CUDA (CUDA Toolkit 8.0.44) + * Clang 6.0.0 for CUDA (CUDA Toolkit 9.1) + * PGI 17.10 + * NVCC 7.0 for CUDA (with gcc 4.8.4) + * NVCC 7.5 for CUDA (with gcc 4.8.4) + * NVCC 8.0.44 for CUDA (with gcc 5.3.0) + * NVCC 9.1 for CUDA (with gcc 6.1.0) -Known non-working combinations: - Power8: - Pthreads backend +### Primary tested compilers on Power 8 are: + * GCC 5.4.0 (OpenMP,Serial) + * IBM XL 13.1.6 (OpenMP, Serial) + * NVCC 8.0.44 for CUDA (with gcc 5.4.0) + * NVCC 9.0.103 for CUDA (with gcc 6.3.0 and XL 13.1.6) + +### Primary tested compilers on Intel KNL are: + * GCC 6.2.0 + * Intel 16.4.258 (with gcc 4.7.2) + * Intel 17.2.174 (with gcc 4.9.3) + * Intel 18.0.128 (with gcc 4.9.3) + +### Primary tested compilers on ARM + * GCC 6.1.0 + +### Other compilers working: + * X86: + - Cygwin 2.1.0 64bit with gcc 4.9.3 + +### Known non-working combinations: + * Power8: + - Pthreads backend + * ARM + - Pthreads backend Primary tested compiler are passing in release mode @@ -97,20 +111,7 @@ NVCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitiali Other compilers are tested occasionally, in particular when pushing from develop to master branch, without -Werror and only for a select set of backends. -============================================================================ -====Getting started========================================================= -============================================================================ - -In the 'example/tutorial' directory you will find step by step tutorial -examples which explain many of the features of Kokkos. They work with -simple Makefiles. To build with g++ and OpenMP simply type 'make' -in the 'example/tutorial' directory. This will build all examples in the -subfolders. To change the build options refer to the Programming Guide -in the compilation section. - -============================================================================ -====Running Unit Tests====================================================== -============================================================================ +# Running Unit Tests To run the unit tests create a build directory and run the following commands @@ -121,30 +122,35 @@ make test Run KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as changing the device type for which to build. -============================================================================ -====Install the library===================================================== -============================================================================ +# Installing the library To install Kokkos as a library create a build directory and run the following KOKKOS_PATH/generate_makefile.bash --prefix=INSTALL_PATH -make lib +make kokkoslib make install KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as changing the device type for which to build. -============================================================================ -====CMakeFiles============================================================== -============================================================================ +Note that in many cases it is preferable to build Kokkos inline with an +application. The main reason is that you may otherwise need many different +configurations of Kokkos installed depending on the required compile time +features an application needs. For example there is only one default +execution space, which means you need different installations to have OpenMP +or Pthreads as the default space. Also for the CUDA backend there are certain +choices, such as allowing relocatable device code, which must be made at +installation time. Building Kokkos inline uses largely the same process +as compiling an application against an installed Kokkos library. See for +example benchmarks/bytes_and_flops/Makefile which can be used with an installed +library and for an inline build. -The CMake files contained in this repository require Tribits and are used -for integration with Trilinos. They do not currently support a standalone -CMake build. +### CMake -=========================================================================== -====Kokkos and CUDA UVM==================================================== -=========================================================================== +Kokkos supports being build as part of a CMake applications. An example can +be found in example/cmake_build. + +# Kokkos and CUDA UVM Kokkos does support UVM as a specific memory space called CudaUVMSpace. Allocations made with that space are accessible from host and device. @@ -154,25 +160,16 @@ In either case UVM comes with a number of restrictions: running. This will lead to segfaults. To avoid that you either need to call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or you can set the environment variable CUDA_LAUNCH_BLOCKING=1. -Furthermore in multi socket multi GPU machines, UVM defaults to using -zero copy allocations for technical reasons related to using multiple +Furthermore in multi socket multi GPU machines without NVLINK, UVM defaults +to using zero copy allocations for technical reasons related to using multiple GPUs from the same process. If an executable doesn't do that (e.g. each MPI rank of an application uses a single GPU [can be the same GPU for multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1. This will enforce proper UVM allocations, but can lead to errors if more than a single GPU is used by a single process. -=========================================================================== -====Contributing=========================================================== -=========================================================================== -Contributions to Kokkos are welcome. In order to do so, please open an issue -where a feature request or bug can be discussed. Then issue a pull request -with your contribution. Pull requests must be issued against the develop branch. - -=========================================================================== -====Citing Kokkos========================================================== -=========================================================================== +# Citing Kokkos If you publish work which mentions Kokkos, please cite the following paper: diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index 3db9a145d7..1c659e44a4 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER @@ -1530,7 +1530,7 @@ struct fill_random_functor_range{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) + if(idx(a.extent(0))) a(idx) = Rand::draw(gen,range); } rand_pool.free_state(gen); @@ -1555,8 +1555,8 @@ struct fill_random_functor_range{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) { - for(IndexType k=0;k(a.dimension_1());k++) + if(idx(a.extent(0))) { + for(IndexType k=0;k(a.extent(1));k++) a(idx,k) = Rand::draw(gen,range); } } @@ -1583,9 +1583,9 @@ struct fill_random_functor_range{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) { - for(IndexType k=0;k(a.dimension_1());k++) - for(IndexType l=0;l(a.dimension_2());l++) + if(idx(a.extent(0))) { + for(IndexType k=0;k(a.extent(1));k++) + for(IndexType l=0;l(a.extent(2));l++) a(idx,k,l) = Rand::draw(gen,range); } } @@ -1611,10 +1611,10 @@ struct fill_random_functor_range{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) { - for(IndexType k=0;k(a.dimension_1());k++) - for(IndexType l=0;l(a.dimension_2());l++) - for(IndexType m=0;m(a.dimension_3());m++) + if(idx(a.extent(0))) { + for(IndexType k=0;k(a.extent(1));k++) + for(IndexType l=0;l(a.extent(2));l++) + for(IndexType m=0;m(a.extent(3));m++) a(idx,k,l,m) = Rand::draw(gen,range); } } @@ -1640,11 +1640,11 @@ struct fill_random_functor_range{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) { - for(IndexType k=0;k(a.dimension_1());k++) - for(IndexType l=0;l(a.dimension_2());l++) - for(IndexType m=0;m(a.dimension_3());m++) - for(IndexType n=0;n(a.dimension_4());n++) + if(idx(a.extent(0))) { + for(IndexType k=0;k(a.extent(1));k++) + for(IndexType l=0;l(a.extent(2));l++) + for(IndexType m=0;m(a.extent(3));m++) + for(IndexType n=0;n(a.extent(4));n++) a(idx,k,l,m,n) = Rand::draw(gen,range); } } @@ -1670,12 +1670,12 @@ struct fill_random_functor_range{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) { - for(IndexType k=0;k(a.dimension_1());k++) - for(IndexType l=0;l(a.dimension_2());l++) - for(IndexType m=0;m(a.dimension_3());m++) - for(IndexType n=0;n(a.dimension_4());n++) - for(IndexType o=0;o(a.dimension_5());o++) + if(idx(a.extent(0))) { + for(IndexType k=0;k(a.extent(1));k++) + for(IndexType l=0;l(a.extent(2));l++) + for(IndexType m=0;m(a.extent(3));m++) + for(IndexType n=0;n(a.extent(4));n++) + for(IndexType o=0;o(a.extent(5));o++) a(idx,k,l,m,n,o) = Rand::draw(gen,range); } } @@ -1701,13 +1701,13 @@ struct fill_random_functor_range{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) { - for(IndexType k=0;k(a.dimension_1());k++) - for(IndexType l=0;l(a.dimension_2());l++) - for(IndexType m=0;m(a.dimension_3());m++) - for(IndexType n=0;n(a.dimension_4());n++) - for(IndexType o=0;o(a.dimension_5());o++) - for(IndexType p=0;p(a.dimension_6());p++) + if(idx(a.extent(0))) { + for(IndexType k=0;k(a.extent(1));k++) + for(IndexType l=0;l(a.extent(2));l++) + for(IndexType m=0;m(a.extent(3));m++) + for(IndexType n=0;n(a.extent(4));n++) + for(IndexType o=0;o(a.extent(5));o++) + for(IndexType p=0;p(a.extent(6));p++) a(idx,k,l,m,n,o,p) = Rand::draw(gen,range); } } @@ -1733,14 +1733,14 @@ struct fill_random_functor_range{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) { - for(IndexType k=0;k(a.dimension_1());k++) - for(IndexType l=0;l(a.dimension_2());l++) - for(IndexType m=0;m(a.dimension_3());m++) - for(IndexType n=0;n(a.dimension_4());n++) - for(IndexType o=0;o(a.dimension_5());o++) - for(IndexType p=0;p(a.dimension_6());p++) - for(IndexType q=0;q(a.dimension_7());q++) + if(idx(a.extent(0))) { + for(IndexType k=0;k(a.extent(1));k++) + for(IndexType l=0;l(a.extent(2));l++) + for(IndexType m=0;m(a.extent(3));m++) + for(IndexType n=0;n(a.extent(4));n++) + for(IndexType o=0;o(a.extent(5));o++) + for(IndexType p=0;p(a.extent(6));p++) + for(IndexType q=0;q(a.extent(7));q++) a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,range); } } @@ -1765,7 +1765,7 @@ struct fill_random_functor_begin_end{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) + if(idx(a.extent(0))) a(idx) = Rand::draw(gen,begin,end); } rand_pool.free_state(gen); @@ -1790,8 +1790,8 @@ struct fill_random_functor_begin_end{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) { - for(IndexType k=0;k(a.dimension_1());k++) + if(idx(a.extent(0))) { + for(IndexType k=0;k(a.extent(1));k++) a(idx,k) = Rand::draw(gen,begin,end); } } @@ -1818,9 +1818,9 @@ struct fill_random_functor_begin_end{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) { - for(IndexType k=0;k(a.dimension_1());k++) - for(IndexType l=0;l(a.dimension_2());l++) + if(idx(a.extent(0))) { + for(IndexType k=0;k(a.extent(1));k++) + for(IndexType l=0;l(a.extent(2));l++) a(idx,k,l) = Rand::draw(gen,begin,end); } } @@ -1846,10 +1846,10 @@ struct fill_random_functor_begin_end{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) { - for(IndexType k=0;k(a.dimension_1());k++) - for(IndexType l=0;l(a.dimension_2());l++) - for(IndexType m=0;m(a.dimension_3());m++) + if(idx(a.extent(0))) { + for(IndexType k=0;k(a.extent(1));k++) + for(IndexType l=0;l(a.extent(2));l++) + for(IndexType m=0;m(a.extent(3));m++) a(idx,k,l,m) = Rand::draw(gen,begin,end); } } @@ -1875,11 +1875,11 @@ struct fill_random_functor_begin_end{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())){ - for(IndexType l=0;l(a.dimension_1());l++) - for(IndexType m=0;m(a.dimension_2());m++) - for(IndexType n=0;n(a.dimension_3());n++) - for(IndexType o=0;o(a.dimension_4());o++) + if(idx(a.extent(0))){ + for(IndexType l=0;l(a.extent(1));l++) + for(IndexType m=0;m(a.extent(2));m++) + for(IndexType n=0;n(a.extent(3));n++) + for(IndexType o=0;o(a.extent(4));o++) a(idx,l,m,n,o) = Rand::draw(gen,begin,end); } } @@ -1905,12 +1905,12 @@ struct fill_random_functor_begin_end{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) { - for(IndexType k=0;k(a.dimension_1());k++) - for(IndexType l=0;l(a.dimension_2());l++) - for(IndexType m=0;m(a.dimension_3());m++) - for(IndexType n=0;n(a.dimension_4());n++) - for(IndexType o=0;o(a.dimension_5());o++) + if(idx(a.extent(0))) { + for(IndexType k=0;k(a.extent(1));k++) + for(IndexType l=0;l(a.extent(2));l++) + for(IndexType m=0;m(a.extent(3));m++) + for(IndexType n=0;n(a.extent(4));n++) + for(IndexType o=0;o(a.extent(5));o++) a(idx,k,l,m,n,o) = Rand::draw(gen,begin,end); } } @@ -1937,13 +1937,13 @@ struct fill_random_functor_begin_end{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) { - for(IndexType k=0;k(a.dimension_1());k++) - for(IndexType l=0;l(a.dimension_2());l++) - for(IndexType m=0;m(a.dimension_3());m++) - for(IndexType n=0;n(a.dimension_4());n++) - for(IndexType o=0;o(a.dimension_5());o++) - for(IndexType p=0;p(a.dimension_6());p++) + if(idx(a.extent(0))) { + for(IndexType k=0;k(a.extent(1));k++) + for(IndexType l=0;l(a.extent(2));l++) + for(IndexType m=0;m(a.extent(3));m++) + for(IndexType n=0;n(a.extent(4));n++) + for(IndexType o=0;o(a.extent(5));o++) + for(IndexType p=0;p(a.extent(6));p++) a(idx,k,l,m,n,o,p) = Rand::draw(gen,begin,end); } } @@ -1969,14 +1969,14 @@ struct fill_random_functor_begin_end{ typename RandomPool::generator_type gen = rand_pool.get_state(); for(IndexType j=0;j(a.dimension_0())) { - for(IndexType k=0;k(a.dimension_1());k++) - for(IndexType l=0;l(a.dimension_2());l++) - for(IndexType m=0;m(a.dimension_3());m++) - for(IndexType n=0;n(a.dimension_4());n++) - for(IndexType o=0;o(a.dimension_5());o++) - for(IndexType p=0;p(a.dimension_6());p++) - for(IndexType q=0;q(a.dimension_7());q++) + if(idx(a.extent(0))) { + for(IndexType k=0;k(a.extent(1));k++) + for(IndexType l=0;l(a.extent(2));l++) + for(IndexType m=0;m(a.extent(3));m++) + for(IndexType n=0;n(a.extent(4));n++) + for(IndexType o=0;o(a.extent(5));o++) + for(IndexType p=0;p(a.extent(6));p++) + for(IndexType q=0;q(a.extent(7));q++) a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,begin,end); } } @@ -1988,14 +1988,14 @@ struct fill_random_functor_begin_end{ template void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) { - int64_t LDA = a.dimension_0(); + int64_t LDA = a.extent(0); if(LDA>0) parallel_for((LDA+127)/128,Impl::fill_random_functor_range(a,g,range)); } template void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin,typename ViewType::const_value_type end ) { - int64_t LDA = a.dimension_0(); + int64_t LDA = a.extent(0); if(LDA>0) parallel_for((LDA+127)/128,Impl::fill_random_functor_begin_end(a,g,begin,end)); } diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp index 237de751fe..888476045b 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER @@ -120,7 +120,6 @@ public: KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - // printf("copy: dst(%i) src(%i)\n",i+dst_offset,i); copy_op::copy(dst_values,i+dst_offset,src_values,i); } }; @@ -151,20 +150,22 @@ public: DstViewType dst_values ; perm_view_type sort_order ; src_view_type src_values ; + int src_offset ; copy_permute_functor( DstViewType const & dst_values_ , PermuteViewType const & sort_order_ , SrcViewType const & src_values_ + , int const & src_offset_ ) : dst_values( dst_values_ ) , sort_order( sort_order_ ) , src_values( src_values_ ) + , src_offset( src_offset_ ) {} KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - // printf("copy_permute: dst(%i) src(%i)\n",i,sort_order(i)); - copy_op::copy(dst_values,i,src_values,sort_order(i)); + copy_op::copy(dst_values,i,src_values,src_offset+sort_order(i)); } }; @@ -259,19 +260,21 @@ public: // Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed void create_permute_vector() { const size_t len = range_end - range_begin ; - Kokkos::parallel_for (Kokkos::RangePolicy (0,len),*this); - Kokkos::parallel_scan(Kokkos::RangePolicy (0,bin_op.max_bins()) ,*this); + Kokkos::parallel_for ("Kokkos::Sort::BinCount",Kokkos::RangePolicy (0,len),*this); + Kokkos::parallel_scan("Kokkos::Sort::BinOffset",Kokkos::RangePolicy (0,bin_op.max_bins()) ,*this); Kokkos::deep_copy(bin_count_atomic,0); - Kokkos::parallel_for (Kokkos::RangePolicy (0,len),*this); + Kokkos::parallel_for ("Kokkos::Sort::BinBinning",Kokkos::RangePolicy (0,len),*this); if(sort_within_bins) - Kokkos::parallel_for (Kokkos::RangePolicy(0,bin_op.max_bins()) ,*this); + Kokkos::parallel_for ("Kokkos::Sort::BinSort",Kokkos::RangePolicy(0,bin_op.max_bins()) ,*this); } - // Sort a view with respect ot the first dimension using the permutation array + // Sort a subset of a view with respect to the first dimension using the permutation array template - void sort( ValuesViewType const & values) + void sort( ValuesViewType const & values + , int values_range_begin + , int values_range_end) const { typedef Kokkos::View< typename ValuesViewType::data_type, @@ -280,6 +283,10 @@ public: scratch_view_type ; const size_t len = range_end - range_begin ; + const size_t values_len = values_range_end - values_range_begin ; + if (len != values_len) { + Kokkos::abort("BinSort::sort: values range length != permutation vector length"); + } scratch_view_type sorted_values("Scratch", @@ -297,19 +304,25 @@ public: , offset_type /* PermuteViewType */ , ValuesViewType /* SrcViewType */ > - functor( sorted_values , sort_order , values ); + functor( sorted_values , sort_order , values, values_range_begin - range_begin ); - parallel_for( Kokkos::RangePolicy(0,len),functor); + parallel_for("Kokkos::Sort::CopyPermute", Kokkos::RangePolicy(0,len),functor); } { copy_functor< ValuesViewType , scratch_view_type > functor( values , range_begin , sorted_values ); - parallel_for( Kokkos::RangePolicy(0,len),functor); + parallel_for("Kokkos::Sort::Copy", Kokkos::RangePolicy(0,len),functor); } } + template + void sort( ValuesViewType const & values ) const + { + this->sort( values, 0, /*values.extent(0)*/ range_end - range_begin ); + } + // Get the permutation vector KOKKOS_INLINE_FUNCTION offset_type get_permute_vector() const { return sort_order;} @@ -327,7 +340,7 @@ public: KOKKOS_INLINE_FUNCTION void operator() (const bin_count_tag& tag, const int& i) const { const int j = range_begin + i ; - bin_count_atomic(bin_op.bin(keys,j))++; + bin_count_atomic(bin_op.bin(keys, j))++; } KOKKOS_INLINE_FUNCTION @@ -512,7 +525,7 @@ void sort( ViewType const & view , bool const always_use_kokkos_sort = false) Kokkos::Experimental::MinMaxScalar result; Kokkos::Experimental::MinMax reducer(result); - parallel_reduce(Kokkos::RangePolicy(0,view.extent(0)), + parallel_reduce("Kokkos::Sort::FindExtent",Kokkos::RangePolicy(0,view.extent(0)), Impl::min_max_functor(view),reducer); if(result.min_val == result.max_val) return; BinSort bin_sort(view,CompType(view.extent(0)/2,result.min_val,result.max_val),true); @@ -532,7 +545,7 @@ void sort( ViewType view Kokkos::Experimental::MinMaxScalar result; Kokkos::Experimental::MinMax reducer(result); - parallel_reduce( range_policy( begin , end ) + parallel_reduce("Kokkos::Sort::FindExtent", range_policy( begin , end ) , Impl::min_max_functor(view),reducer ); if(result.min_val == result.max_val) return; @@ -541,8 +554,9 @@ void sort( ViewType view bin_sort(view,begin,end,CompType((end-begin)/2,result.min_val,result.max_val),true); bin_sort.create_permute_vector(); - bin_sort.sort(view); + bin_sort.sort(view,begin,end); } + } #endif diff --git a/lib/kokkos/algorithms/unit_tests/TestCuda.cpp b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp index 710eeb8ada..86fdccd0e7 100644 --- a/lib/kokkos/algorithms/unit_tests/TestCuda.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER @@ -61,14 +61,9 @@ class cuda : public ::testing::Test { protected: static void SetUpTestCase() { - std::cout << std::setprecision(5) << std::scientific; - Kokkos::HostSpace::execution_space::initialize(); - Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); } static void TearDownTestCase() { - Kokkos::Cuda::finalize(); - Kokkos::HostSpace::execution_space::finalize(); } }; diff --git a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp index c2c118ce1a..c4ddde7b7f 100644 --- a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER @@ -60,25 +60,10 @@ protected: static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; - - int threads_count = 0; - #pragma omp parallel - { - #pragma omp atomic - ++threads_count; - } - - if (threads_count > 3) { - threads_count /= 2; - } - - Kokkos::OpenMP::initialize( threads_count ); - Kokkos::OpenMP::print_configuration( std::cout ); } static void TearDownTestCase() { - Kokkos::OpenMP::finalize(); } }; diff --git a/lib/kokkos/algorithms/unit_tests/TestROCm.cpp b/lib/kokkos/algorithms/unit_tests/TestROCm.cpp index 720b377ed2..15179509bb 100644 --- a/lib/kokkos/algorithms/unit_tests/TestROCm.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestROCm.cpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER @@ -62,13 +62,9 @@ protected: static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; - Kokkos::HostSpace::execution_space::initialize(); - Kokkos::Experimental::ROCm::initialize( Kokkos::Experimental::ROCm::SelectDevice(0) ); } static void TearDownTestCase() { - Kokkos::Experimental::ROCm::finalize(); - Kokkos::HostSpace::execution_space::finalize(); } }; diff --git a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp index 2771f1793d..73bd416f2a 100644 --- a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -34,7 +34,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/algorithms/unit_tests/TestSerial.cpp b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp index a1df93e07b..9cf998f773 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSerial.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER @@ -62,13 +62,10 @@ class serial : public ::testing::Test { protected: static void SetUpTestCase() { - std::cout << std::setprecision (5) << std::scientific; - Kokkos::Serial::initialize (); } static void TearDownTestCase () { - Kokkos::Serial::finalize (); } }; diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp index 2cb0b89712..e0c646c199 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -34,7 +34,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER @@ -171,10 +171,10 @@ void test_3D_sort(unsigned int n) { double sum_after = 0.0; unsigned int sort_fails = 0; - Kokkos::parallel_reduce(keys.dimension_0(),sum3D(keys),sum_before); + Kokkos::parallel_reduce(keys.extent(0),sum3D(keys),sum_before); int bin_1d = 1; - while( bin_1d*bin_1d*bin_1d*4< (int) keys.dimension_0() ) bin_1d*=2; + while( bin_1d*bin_1d*bin_1d*4< (int) keys.extent(0) ) bin_1d*=2; int bin_max[3] = {bin_1d,bin_1d,bin_1d}; typename KeyViewType::value_type min[3] = {0,0,0}; typename KeyViewType::value_type max[3] = {100,100,100}; @@ -186,8 +186,8 @@ void test_3D_sort(unsigned int n) { Sorter.create_permute_vector(); Sorter.template sort< KeyViewType >(keys); - Kokkos::parallel_reduce(keys.dimension_0(),sum3D(keys),sum_after); - Kokkos::parallel_reduce(keys.dimension_0()-1,bin3d_is_sorted_struct(keys,bin_1d,min[0],max[0]),sort_fails); + Kokkos::parallel_reduce(keys.extent(0),sum3D(keys),sum_after); + Kokkos::parallel_reduce(keys.extent(0)-1,bin3d_is_sorted_struct(keys,bin_1d,min[0],max[0]),sort_fails); double ratio = sum_before/sum_after; double epsilon = 1e-10; @@ -205,24 +205,13 @@ void test_3D_sort(unsigned int n) { template void test_dynamic_view_sort(unsigned int n ) { - typedef typename ExecutionSpace::memory_space memory_space ; typedef Kokkos::Experimental::DynamicView KeyDynamicViewType; typedef Kokkos::View KeyViewType; const size_t upper_bound = 2 * n ; + const size_t min_chunk_size = 1024; - const size_t total_alloc_size = n * sizeof(KeyType) * 1.2 ; - const size_t superblock_size = std::min(total_alloc_size, size_t(1000000)); - - typename KeyDynamicViewType::memory_pool - pool( memory_space() - , n * sizeof(KeyType) * 1.2 - , 500 /* min block size in bytes */ - , 30000 /* max block size in bytes */ - , superblock_size - ); - - KeyDynamicViewType keys("Keys",pool,upper_bound); + KeyDynamicViewType keys("Keys", min_chunk_size, upper_bound); keys.resize_serial(n); @@ -230,13 +219,15 @@ void test_dynamic_view_sort(unsigned int n ) // Test sorting array with all numbers equal Kokkos::deep_copy(keys_view,KeyType(1)); - Kokkos::Experimental::deep_copy(keys,keys_view); + Kokkos::deep_copy(keys,keys_view); Kokkos::sort(keys, 0 /* begin */ , n /* end */ ); Kokkos::Random_XorShift64_Pool g(1931); Kokkos::fill_random(keys_view,g,Kokkos::Random_XorShift64_Pool::generator_type::MAX_URAND); - Kokkos::Experimental::deep_copy(keys,keys_view); + ExecutionSpace::fence(); + Kokkos::deep_copy(keys,keys_view); + //ExecutionSpace::fence(); double sum_before = 0.0; double sum_after = 0.0; @@ -246,7 +237,9 @@ void test_dynamic_view_sort(unsigned int n ) Kokkos::sort(keys, 0 /* begin */ , n /* end */ ); - Kokkos::Experimental::deep_copy( keys_view , keys ); + ExecutionSpace::fence(); // Need this fence to prevent BusError with Cuda + Kokkos::deep_copy( keys_view , keys ); + //ExecutionSpace::fence(); Kokkos::parallel_reduce(n,sum(keys_view),sum_after); Kokkos::parallel_reduce(n-1,is_sorted_struct(keys_view),sort_fails); @@ -269,6 +262,74 @@ void test_dynamic_view_sort(unsigned int n ) //---------------------------------------------------------------------------- +template +void test_issue_1160() +{ + Kokkos::View element_("element", 10); + Kokkos::View x_("x", 10); + Kokkos::View v_("y", 10); + + auto h_element = Kokkos::create_mirror_view(element_); + auto h_x = Kokkos::create_mirror_view(x_); + auto h_v = Kokkos::create_mirror_view(v_); + + h_element(0) = 9; + h_element(1) = 8; + h_element(2) = 7; + h_element(3) = 6; + h_element(4) = 5; + h_element(5) = 4; + h_element(6) = 3; + h_element(7) = 2; + h_element(8) = 1; + h_element(9) = 0; + + for (int i = 0; i < 10; ++i) { + h_v.access(i, 0) = h_x.access(i, 0) = double(h_element(i)); + } + Kokkos::deep_copy(element_, h_element); + Kokkos::deep_copy(x_, h_x); + Kokkos::deep_copy(v_, h_v); + + typedef decltype(element_) KeyViewType; + typedef Kokkos::BinOp1D< KeyViewType > BinOp; + + int begin = 3; + int end = 8; + auto max = h_element(begin); + auto min = h_element(end - 1); + BinOp binner(end - begin, min, max); + + Kokkos::BinSort Sorter(element_,begin,end,binner,false); + Sorter.create_permute_vector(); + Sorter.sort(element_,begin,end); + + Sorter.sort(x_,begin,end); + Sorter.sort(v_,begin,end); + + Kokkos::deep_copy(h_element, element_); + Kokkos::deep_copy(h_x, x_); + Kokkos::deep_copy(h_v, v_); + + ASSERT_EQ(h_element(0), 9); + ASSERT_EQ(h_element(1), 8); + ASSERT_EQ(h_element(2), 7); + ASSERT_EQ(h_element(3), 2); + ASSERT_EQ(h_element(4), 3); + ASSERT_EQ(h_element(5), 4); + ASSERT_EQ(h_element(6), 5); + ASSERT_EQ(h_element(7), 6); + ASSERT_EQ(h_element(8), 1); + ASSERT_EQ(h_element(9), 0); + + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(h_element(i), int(h_x.access(i, 0))); + ASSERT_EQ(h_element(i), int(h_v.access(i, 0))); + } +} + +//---------------------------------------------------------------------------- + template void test_sort(unsigned int N) { @@ -278,6 +339,7 @@ void test_sort(unsigned int N) test_3D_sort(N); test_dynamic_view_sort(N*N); #endif + test_issue_1160(); } } diff --git a/lib/kokkos/algorithms/unit_tests/TestThreads.cpp b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp index 08749779ff..99cdb7da92 100644 --- a/lib/kokkos/algorithms/unit_tests/TestThreads.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER @@ -63,25 +63,10 @@ protected: static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; - - unsigned num_threads = 4; - - if (Kokkos::hwloc::available()) { - num_threads = Kokkos::hwloc::get_available_numa_count() - * Kokkos::hwloc::get_available_cores_per_numa() - // * Kokkos::hwloc::get_available_threads_per_core() - ; - - } - - std::cout << "Threads: " << num_threads << std::endl; - - Kokkos::Threads::initialize( num_threads ); } static void TearDownTestCase() { - Kokkos::Threads::finalize(); } }; diff --git a/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp index 9e75b580bc..8feb08332f 100644 --- a/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp +++ b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp @@ -35,16 +35,20 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER */ #include +#include int main(int argc, char *argv[]) { + Kokkos::initialize(argc,argv); ::testing::InitGoogleTest(&argc,argv); - return RUN_ALL_TESTS(); + int result = RUN_ALL_TESTS(); + Kokkos::finalize(); + return result; } diff --git a/lib/kokkos/benchmarks/atomic/Makefile b/lib/kokkos/benchmarks/atomic/Makefile index 41875ee5e4..64b43917de 100644 --- a/lib/kokkos/benchmarks/atomic/Makefile +++ b/lib/kokkos/benchmarks/atomic/Makefile @@ -10,7 +10,7 @@ default: build ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ${KOKKOS_PATH}/config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper EXE = ${EXE_NAME}.cuda KOKKOS_CUDA_OPTIONS = "enable_lambda" else diff --git a/lib/kokkos/benchmarks/benchmark_suite/scripts/run_tests.bash b/lib/kokkos/benchmarks/benchmark_suite/scripts/run_tests.bash index 63aaca9e40..9dded535e8 100755 --- a/lib/kokkos/benchmarks/benchmark_suite/scripts/run_tests.bash +++ b/lib/kokkos/benchmarks/benchmark_suite/scripts/run_tests.bash @@ -3,7 +3,7 @@ # BytesAndFlops cd build/bytes_and_flops -USE_CUDA=`grep "_CUDA 1" KokkosCore_config.h | wc -l` +USE_CUDA=`grep "_CUDA" KokkosCore_config.h | wc -l` if [[ ${USE_CUDA} > 0 ]]; then BAF_EXE=bytes_and_flops.cuda @@ -41,4 +41,4 @@ cd ../.. echo "MiniFE: ${FE_PERF_1} ${FE_PERF_2}" PERF_RESULT=`echo "${BAF_PERF_1} ${BAF_PERF_2} ${MD_PERF_1} ${MD_PERF_2} ${FE_PERF_1} ${FE_PERF_2}" | awk '{print ($1+$2+$3+$4+$5+$6)/6}'` -echo "Total Result: " ${PERF_RESULT} \ No newline at end of file +echo "Total Result: " ${PERF_RESULT} diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp index e3fe42a652..59b4d50c44 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp index b60ec84994..6509c654e7 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp b/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp index 0992c5b54b..c6651da1e7 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/benchmarks/bytes_and_flops/main.cpp b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp index 8db5ce0eb5..4f46b38717 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/main.cpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/benchmarks/gather/gather.hpp b/lib/kokkos/benchmarks/gather/gather.hpp index 406bd28983..bbbd65850f 100644 --- a/lib/kokkos/benchmarks/gather/gather.hpp +++ b/lib/kokkos/benchmarks/gather/gather.hpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/benchmarks/gather/gather_unroll.hpp b/lib/kokkos/benchmarks/gather/gather_unroll.hpp index 1d01b26ca7..1d9c99adf9 100644 --- a/lib/kokkos/benchmarks/gather/gather_unroll.hpp +++ b/lib/kokkos/benchmarks/gather/gather_unroll.hpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/benchmarks/gather/main.cpp b/lib/kokkos/benchmarks/gather/main.cpp index 88eb0493c1..ca5238e7fd 100644 --- a/lib/kokkos/benchmarks/gather/main.cpp +++ b/lib/kokkos/benchmarks/gather/main.cpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/benchmarks/policy_performance/main.cpp b/lib/kokkos/benchmarks/policy_performance/main.cpp index b0ed9bb512..2f5395734a 100644 --- a/lib/kokkos/benchmarks/policy_performance/main.cpp +++ b/lib/kokkos/benchmarks/policy_performance/main.cpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp index 11576413e2..1ab437928d 100644 --- a/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp +++ b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/cmake/Modules/FindHWLOC.cmake b/lib/kokkos/cmake/Modules/FindHWLOC.cmake index 273dcb5c8a..60df8084d8 100644 --- a/lib/kokkos/cmake/Modules/FindHWLOC.cmake +++ b/lib/kokkos/cmake/Modules/FindHWLOC.cmake @@ -2,7 +2,7 @@ # FindHWLOC # ---------- # -# Try to find HWLOC. +# Try to find HWLOC, based on KOKKOS_HWLOC_DIR # # The following variables are defined: # @@ -10,8 +10,8 @@ # HWLOC_INCLUDE_DIR - HWLOC include directory # HWLOC_LIBRARIES - Libraries needed to use HWLOC -find_path(HWLOC_INCLUDE_DIR hwloc.h) -find_library(HWLOC_LIBRARIES hwloc) +find_path(HWLOC_INCLUDE_DIR hwloc.h PATHS "${KOKKOS_HWLOC_DIR}/include") +find_library(HWLOC_LIBRARIES hwloc PATHS "${KOKKOS_HWLOC_DIR}/lib") include(FindPackageHandleStandardArgs) find_package_handle_standard_args(HWLOC DEFAULT_MSG diff --git a/lib/kokkos/cmake/kokkos_build.cmake b/lib/kokkos/cmake/kokkos_build.cmake index f31680d6e2..76d0655adb 100644 --- a/lib/kokkos/cmake/kokkos_build.cmake +++ b/lib/kokkos/cmake/kokkos_build.cmake @@ -1,7 +1,3 @@ -# kokkos_generated_settings.cmake includes the kokkos library itself in KOKKOS_LIBS -# which we do not want to use for the cmake builds so clean this up -string(REGEX REPLACE "-lkokkos" "" KOKKOS_LIBS ${KOKKOS_LIBS}) - ############################ Detect if submodule ############################### # # With thanks to StackOverflow: @@ -73,6 +69,19 @@ IF(KOKKOS_SEPARATE_LIBS) PUBLIC $<$:${KOKKOS_CXX_FLAGS}> ) + target_include_directories( + kokkoscore + PUBLIC + ${KOKKOS_TPL_INCLUDE_DIRS} + ) + + foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES) + find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS}) + target_link_libraries(kokkoscore PUBLIC ${LIB_${lib}}) + endforeach() + + target_link_libraries(kokkoscore PUBLIC "${KOKKOS_LINK_FLAGS}") + # Install the kokkoscore library INSTALL (TARGETS kokkoscore EXPORT KokkosTargets @@ -81,12 +90,6 @@ IF(KOKKOS_SEPARATE_LIBS) RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin ) - TARGET_LINK_LIBRARIES( - kokkoscore - ${KOKKOS_LD_FLAGS} - ${KOKKOS_EXTRA_LIBS_LIST} - ) - # kokkoscontainers if (DEFINED KOKKOS_CONTAINERS_SRCS) ADD_LIBRARY( @@ -144,12 +147,19 @@ ELSE() PUBLIC $<$:${KOKKOS_CXX_FLAGS}> ) - TARGET_LINK_LIBRARIES( + target_include_directories( kokkos - ${KOKKOS_LD_FLAGS} - ${KOKKOS_EXTRA_LIBS_LIST} + PUBLIC + ${KOKKOS_TPL_INCLUDE_DIRS} ) + foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES) + find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS}) + target_link_libraries(kokkos PUBLIC ${LIB_${lib}}) + endforeach() + + target_link_libraries(kokkos PUBLIC "${KOKKOS_LINK_FLAGS}") + # Install the kokkos library INSTALL (TARGETS kokkos EXPORT KokkosTargets diff --git a/lib/kokkos/cmake/kokkos_options.cmake b/lib/kokkos/cmake/kokkos_options.cmake index f17710a4ce..25eb8e86ce 100644 --- a/lib/kokkos/cmake/kokkos_options.cmake +++ b/lib/kokkos/cmake/kokkos_options.cmake @@ -25,11 +25,12 @@ list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST Cuda_LDG_Intrinsic Debug Debug_DualView_Modify_Check - Debug_Bounds_Checkt + Debug_Bounds_Check Compiler_Warnings Profiling Profiling_Load_Print Aggressive_Vectorization + Deprecated_Code ) #------------------------------------------------------------------------------- @@ -263,7 +264,8 @@ set(KOKKOS_ENABLE_PROFILING ${KOKKOS_INTERNAL_ENABLE_PROFILING_DEFAULT} CACHE BO set_kokkos_default_default(PROFILING_LOAD_PRINT OFF) set(KOKKOS_ENABLE_PROFILING_LOAD_PRINT ${KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT_DEFAULT} CACHE BOOL "Enable profile load print.") - +set_kokkos_default_default(DEPRECATED_CODE ON) +set(KOKKOS_ENABLE_DEPRECATED_CODE ${KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE_DEFAULT} CACHE BOOL "Enable deprecated code.") #------------------------------------------------------------------------------- diff --git a/lib/kokkos/cmake/kokkos_settings.cmake b/lib/kokkos/cmake/kokkos_settings.cmake index 850a74a670..579fab0c95 100644 --- a/lib/kokkos/cmake/kokkos_settings.cmake +++ b/lib/kokkos/cmake/kokkos_settings.cmake @@ -14,6 +14,13 @@ #------------------------------------------------------------------------------- # Ensure that KOKKOS_ARCH is in the ARCH_LIST +if (KOKKOS_ARCH MATCHES ",") + message("-- Detected a comma in: KOKKOS_ARCH=${KOKKOS_ARCH}") + message("-- Although we prefer KOKKOS_ARCH to be semicolon-delimited, we do allow") + message("-- comma-delimited values for compatibility with scripts (see github.com/trilinos/Trilinos/issues/2330)") + string(REPLACE "," ";" KOKKOS_ARCH "${KOKKOS_ARCH}") + message("-- Commas were changed to semicolons, now KOKKOS_ARCH=${KOKKOS_ARCH}") +endif() foreach(arch ${KOKKOS_ARCH}) list(FIND KOKKOS_ARCH_LIST ${arch} indx) if (indx EQUAL -1) @@ -23,14 +30,13 @@ foreach(arch ${KOKKOS_ARCH}) endforeach() # KOKKOS_SETTINGS uses KOKKOS_ARCH -string(REPLACE ";" "," KOKKOS_ARCH "${KOKKOS_ARCH}") -set(KOKKOS_ARCH ${KOKKOS_ARCH}) +string(REPLACE ";" "," KOKKOS_GMAKE_ARCH "${KOKKOS_ARCH}") # From Makefile.kokkos: Options: yes,no if(${KOKKOS_ENABLE_DEBUG}) - set(KOKKOS_DEBUG yes) + set(KOKKOS_GMAKE_DEBUG yes) else() - set(KOKKOS_DEBUG no) + set(KOKKOS_GMAKE_DEBUG no) endif() #------------------------------- KOKKOS_DEVICES -------------------------------- @@ -43,10 +49,10 @@ foreach(devopt ${KOKKOS_DEVICES_LIST}) endif () endforeach() # List needs to be comma-delmitted -string(REPLACE ";" "," KOKKOS_DEVICES "${KOKKOS_DEVICESl}") +string(REPLACE ";" "," KOKKOS_GMAKE_DEVICES "${KOKKOS_DEVICESl}") #------------------------------- KOKKOS_OPTIONS -------------------------------- -# From Makefile.kokkos: Options: aggressive_vectorization,disable_profiling +# From Makefile.kokkos: Options: aggressive_vectorization,disable_profiling,disable_deprecated_code #compiler_warnings, aggressive_vectorization, disable_profiling, disable_dualview_modify_check, enable_profile_load_print set(KOKKOS_OPTIONSl) @@ -57,7 +63,10 @@ if(${KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION}) list(APPEND KOKKOS_OPTIONSl aggressive_vectorization) endif() if(NOT ${KOKKOS_ENABLE_PROFILING}) - list(APPEND KOKKOS_OPTIONSl disable_vectorization) + list(APPEND KOKKOS_OPTIONSl disable_profiling) +endif() +if(NOT ${KOKKOS_ENABLE_DEPRECATED_CODE}) + list(APPEND KOKKOS_OPTIONSl disable_deprecated_code) endif() if(NOT ${KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK}) list(APPEND KOKKOS_OPTIONSl disable_dualview_modify_check) @@ -66,7 +75,7 @@ if(${KOKKOS_ENABLE_PROFILING_LOAD_PRINT}) list(APPEND KOKKOS_OPTIONSl enable_profile_load_print) endif() # List needs to be comma-delimitted -string(REPLACE ";" "," KOKKOS_OPTIONS "${KOKKOS_OPTIONSl}") +string(REPLACE ";" "," KOKKOS_GMAKE_OPTIONS "${KOKKOS_OPTIONSl}") #------------------------------- KOKKOS_USE_TPLS ------------------------------- @@ -78,19 +87,19 @@ foreach(tplopt ${KOKKOS_USE_TPLS_LIST}) endif () endforeach() # List needs to be comma-delimitted -string(REPLACE ";" "," KOKKOS_USE_TPLS "${KOKKOS_USE_TPLSl}") +string(REPLACE ";" "," KOKKOS_GMAKE_USE_TPLS "${KOKKOS_USE_TPLSl}") #------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- # Construct the Makefile options -set(KOKKOS_CUDA_OPTIONS) +set(KOKKOS_CUDA_OPTIONSl) foreach(cudaopt ${KOKKOS_CUDA_OPTIONS_LIST}) if (${KOKKOS_ENABLE_CUDA_${cudaopt}}) list(APPEND KOKKOS_CUDA_OPTIONSl ${KOKKOS_INTERNAL_${cudaopt}}) endif () endforeach() # List needs to be comma-delmitted -string(REPLACE ";" "," KOKKOS_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}") +string(REPLACE ";" "," KOKKOS_GMAKE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}") #------------------------------- PATH VARIABLES -------------------------------- # Want makefile to use same executables specified which means modifying @@ -100,10 +109,10 @@ string(REPLACE ";" "," KOKKOS_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}") set(KOKKOS_INTERNAL_PATHS) set(addpathl) -foreach(kvar "CUDA;QTHREADS;${KOKKOS_USE_TPLS_LIST}") +foreach(kvar IN LISTS KOKKOS_USE_TPLS_LIST ITEMS CUDA QTHREADS) if(${KOKKOS_ENABLE_${kvar}}) if(DEFINED KOKKOS_${kvar}_DIR) - set(KOKKOS_INTERNAL_PATHS "${KOKKOS_INTERNAL_PATHS} ${kvar}_PATH=${KOKKOS_${kvar}_DIR}") + set(KOKKOS_INTERNAL_PATHS ${KOKKOS_INTERNAL_PATHS} "${kvar}_PATH=${KOKKOS_${kvar}_DIR}") if(IS_DIRECTORY ${KOKKOS_${kvar}_DIR}/bin) list(APPEND addpathl ${KOKKOS_${kvar}_DIR}/bin) endif() @@ -124,10 +133,9 @@ set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_INSTALL_PATH=${CMAKE_INSTALL_PREFI # Form of KOKKOS_foo=$KOKKOS_foo foreach(kvar ARCH;DEVICES;DEBUG;OPTIONS;CUDA_OPTIONS;USE_TPLS) - set(KOKKOS_VAR KOKKOS_${kvar}) - if(DEFINED KOKKOS_${kvar}) - if (NOT "${${KOKKOS_VAR}}" STREQUAL "") - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_VAR}=${${KOKKOS_VAR}}) + if(DEFINED KOKKOS_GMAKE_${kvar}) + if (NOT "${KOKKOS_GMAKE_${kvar}}" STREQUAL "") + set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_${kvar}=${KOKKOS_GMAKE_${kvar}}) endif() endif() endforeach() @@ -147,7 +155,7 @@ if (NOT "${KOKKOS_INTERNAL_PATHS}" STREQUAL "") set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_INTERNAL_PATHS}) endif() if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "") - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} PATH=${KOKKOS_INTERNAL_ADDTOPATH}:\${PATH}) + set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=\"${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}\"") endif() # Final form that gets passed to make @@ -185,7 +193,7 @@ if(KOKKOS_CMAKE_VERBOSE) message(STATUS "") message(STATUS "Architectures:") - message(STATUS " ${KOKKOS_ARCH}") + message(STATUS " ${KOKKOS_GMAKE_ARCH}") message(STATUS "") message(STATUS "Enabled options") @@ -194,43 +202,14 @@ if(KOKKOS_CMAKE_VERBOSE) message(STATUS " KOKKOS_SEPARATE_LIBS") endif() - if(KOKKOS_ENABLE_HWLOC) - message(STATUS " KOKKOS_ENABLE_HWLOC") - endif() - - if(KOKKOS_ENABLE_MEMKIND) - message(STATUS " KOKKOS_ENABLE_MEMKIND") - endif() - - if(KOKKOS_ENABLE_DEBUG) - message(STATUS " KOKKOS_ENABLE_DEBUG") - endif() - - if(KOKKOS_ENABLE_PROFILING) - message(STATUS " KOKKOS_ENABLE_PROFILING") - endif() - - if(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) - message(STATUS " KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION") - endif() + foreach(opt IN LISTS KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST) + string(TOUPPER ${opt} OPT) + if (KOKKOS_ENABLE_${OPT}) + message(STATUS " KOKKOS_ENABLE_${OPT}") + endif() + endforeach() if(KOKKOS_ENABLE_CUDA) - if(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC) - message(STATUS " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC") - endif() - - if(KOKKOS_ENABLE_CUDA_UVM) - message(STATUS " KOKKOS_ENABLE_CUDA_UVM") - endif() - - if(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) - message(STATUS " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE") - endif() - - if(KOKKOS_ENABLE_CUDA_LAMBDA) - message(STATUS " KOKKOS_ENABLE_CUDA_LAMBDA") - endif() - if(KOKKOS_CUDA_DIR) message(STATUS " KOKKOS_CUDA_DIR: ${KOKKOS_CUDA_DIR}") endif() diff --git a/lib/kokkos/cmake/tribits.cmake b/lib/kokkos/cmake/tribits.cmake index 321704a1c8..1b5a7b2adb 100644 --- a/lib/kokkos/cmake/tribits.cmake +++ b/lib/kokkos/cmake/tribits.cmake @@ -3,7 +3,7 @@ INCLUDE(CTest) cmake_policy(SET CMP0054 NEW) -MESSAGE(WARNING "The project name is: ${PROJECT_NAME}") +MESSAGE(STATUS "The project name is: ${PROJECT_NAME}") IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP) SET(${PROJECT_NAME}_ENABLE_OpenMP OFF) @@ -84,9 +84,6 @@ ENDFUNCTION() MACRO(TRIBITS_ADD_TEST_DIRECTORIES) - message(STATUS "ProjectName: " ${PROJECT_NAME}) - message(STATUS "Tests: " ${${PROJECT_NAME}_ENABLE_TESTS}) - IF(${${PROJECT_NAME}_ENABLE_TESTS}) FOREACH(TEST_DIR ${ARGN}) ADD_SUBDIRECTORY(${TEST_DIR}) @@ -95,13 +92,11 @@ MACRO(TRIBITS_ADD_TEST_DIRECTORIES) ENDMACRO() MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES) - IF(${PACKAGE_NAME}_ENABLE_EXAMPLES OR ${PARENT_PACKAGE_NAME}_ENABLE_EXAMPLES) FOREACH(EXAMPLE_DIR ${ARGN}) ADD_SUBDIRECTORY(${EXAMPLE_DIR}) ENDFOREACH() ENDIF() - ENDMACRO() diff --git a/lib/kokkos/config/configure_compton_cpu.sh b/lib/kokkos/config/configure_compton_cpu.sh deleted file mode 100644 index 17287fb848..0000000000 --- a/lib/kokkos/config/configure_compton_cpu.sh +++ /dev/null @@ -1,190 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -# Additional command-line arguments given to this script will be -# passed directly to CMake. -# - -# -# Force CMake to re-evaluate build options. -# -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -#----------------------------------------------------------------------------- -# Incrementally construct cmake configure options: - -CMAKE_CONFIGURE="" - -#----------------------------------------------------------------------------- -# Location of Trilinos source tree: - -CMAKE_PROJECT_DIR="${HOME}/Trilinos" - -# Location for installation: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/host/`date +%F`" - -#----------------------------------------------------------------------------- -# General build options. -# Use a variable so options can be propagated to CUDA compiler. - -CMAKE_VERBOSE_MAKEFILE=OFF -CMAKE_BUILD_TYPE=RELEASE -# CMAKE_BUILD_TYPE=DEBUG - -#----------------------------------------------------------------------------- -# Build for CUDA architecture: - -CUDA_ARCH="" -# CUDA_ARCH="20" -# CUDA_ARCH="30" -# CUDA_ARCH="35" - -# Build with Intel compiler - -INTEL=ON - -# Build for MIC architecture: - -# INTEL_XEON_PHI=ON - -# Build with HWLOC at location: - -HWLOC_BASE_DIR="/home/projects/libraries/host/hwloc/1.6.2" - -# Location for MPI to use in examples: - -MPI_BASE_DIR="" - -#----------------------------------------------------------------------------- -# MPI configuation only used for examples: -# -# Must have the MPI_BASE_DIR so that the -# include path can be passed to the Cuda compiler - -if [ -n "${MPI_BASE_DIR}" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" -else - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" -fi - -#----------------------------------------------------------------------------- -# Pthread configuation: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" - -#----------------------------------------------------------------------------- -# OpenMP configuation: - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" - -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Configure packages for kokkos-only: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Hardware locality cmake configuration: - -if [ -n "${HWLOC_BASE_DIR}" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" -fi - -#----------------------------------------------------------------------------- -# Cuda cmake configuration: - -if [ -n "${CUDA_ARCH}" ] ; -then - - # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, - # this is different than the standard CMAKE_CXX_FLAGS syntax. - - CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" - - if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; - then - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" - else - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" - fi - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" - -fi - -#----------------------------------------------------------------------------- - -if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" -fi - -#----------------------------------------------------------------------------- - -# Cross-compile for Intel Xeon Phi: - -if [ "${INTEL_XEON_PHI}" = "ON" ] ; -then - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" - - # Cannot cross-compile fortran compatibility checks on the MIC: - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" - - # Tell cmake the answers to compile-and-execute tests - # to prevent cmake from executing a cross-compiled program. - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" - -fi - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" - -#----------------------------------------------------------------------------- - -echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" - -cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/configure_compton_mic.sh b/lib/kokkos/config/configure_compton_mic.sh deleted file mode 100644 index 7f9aee13f9..0000000000 --- a/lib/kokkos/config/configure_compton_mic.sh +++ /dev/null @@ -1,186 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -# Additional command-line arguments given to this script will be -# passed directly to CMake. -# - -# -# Force CMake to re-evaluate build options. -# -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -#----------------------------------------------------------------------------- -# Incrementally construct cmake configure options: - -CMAKE_CONFIGURE="" - -#----------------------------------------------------------------------------- -# Location of Trilinos source tree: - -CMAKE_PROJECT_DIR="${HOME}/Trilinos" - -# Location for installation: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/mic/`date +%F`" - -#----------------------------------------------------------------------------- -# General build options. -# Use a variable so options can be propagated to CUDA compiler. - -CMAKE_VERBOSE_MAKEFILE=OFF -CMAKE_BUILD_TYPE=RELEASE -# CMAKE_BUILD_TYPE=DEBUG - -#----------------------------------------------------------------------------- -# Build for CUDA architecture: - -CUDA_ARCH="" -# CUDA_ARCH="20" -# CUDA_ARCH="30" -# CUDA_ARCH="35" - -# Build for MIC architecture: - -INTEL_XEON_PHI=ON - -# Build with HWLOC at location: - -HWLOC_BASE_DIR="/home/projects/libraries/mic/hwloc/1.6.2" - -# Location for MPI to use in examples: - -MPI_BASE_DIR="" - -#----------------------------------------------------------------------------- -# MPI configuation only used for examples: -# -# Must have the MPI_BASE_DIR so that the -# include path can be passed to the Cuda compiler - -if [ -n "${MPI_BASE_DIR}" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" -else - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" -fi - -#----------------------------------------------------------------------------- -# Pthread configuation: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" - -#----------------------------------------------------------------------------- -# OpenMP configuation: - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" - -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Configure packages for kokkos-only: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Hardware locality cmake configuration: - -if [ -n "${HWLOC_BASE_DIR}" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" -fi - -#----------------------------------------------------------------------------- -# Cuda cmake configuration: - -if [ -n "${CUDA_ARCH}" ] ; -then - - # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, - # this is different than the standard CMAKE_CXX_FLAGS syntax. - - CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" - - if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; - then - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" - else - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" - fi - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" - -fi - -#----------------------------------------------------------------------------- - -if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" -fi - -#----------------------------------------------------------------------------- - -# Cross-compile for Intel Xeon Phi: - -if [ "${INTEL_XEON_PHI}" = "ON" ] ; -then - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" - - # Cannot cross-compile fortran compatibility checks on the MIC: - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" - - # Tell cmake the answers to compile-and-execute tests - # to prevent cmake from executing a cross-compiled program. - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" - -fi - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" - -#----------------------------------------------------------------------------- - -echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" - -cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/configure_kokkos.sh b/lib/kokkos/config/configure_kokkos.sh deleted file mode 100644 index 592e7f5936..0000000000 --- a/lib/kokkos/config/configure_kokkos.sh +++ /dev/null @@ -1,293 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -#----------------------------------------------------------------------------- -# General build options. -# Use a variable so options can be propagated to CUDA compiler. - -CMAKE_BUILD_TYPE=RELEASE -# CMAKE_BUILD_TYPE=DEBUG - -# Source and installation directories: - -TRILINOS_SOURCE_DIR=${HOME}/Trilinos -TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` - -#----------------------------------------------------------------------------- - -USE_CUDA_ARCH= -USE_THREAD= -USE_OPENMP= -USE_INTEL= -USE_XEON_PHI= -HWLOC_BASE_DIR= -MPI_BASE_DIR= -BLAS_LIB_DIR= -LAPACK_LIB_DIR= - -if [ 1 ] ; then - # Platform 'kokkos-dev' with Cuda, OpenMP, hwloc, mpi, gnu - USE_CUDA_ARCH="35" - USE_OPENMP=ON - HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7" - MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7" - BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib" - LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib" - -elif [ ] ; then - # Platform 'kokkos-dev' with Cuda, Threads, hwloc, mpi, gnu - USE_CUDA_ARCH="35" - USE_THREAD=ON - HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7" - MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7" - BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib" - LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib" - -elif [ ] ; then - # Platform 'kokkos-dev' with Xeon Phi and hwloc - USE_OPENMP=ON - USE_INTEL=ON - USE_XEON_PHI=ON - HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/mic/intel/13.SP1.1.106" - -elif [ ] ; then - # Platform 'kokkos-nvidia' with Cuda, OpenMP, hwloc, mpi, gnu - USE_CUDA_ARCH="20" - USE_OPENMP=ON - HWLOC_BASE_DIR="/home/sems/common/hwloc/current" - MPI_BASE_DIR="/home/sems/common/openmpi/current" - -elif [ ] ; then - # Platform 'kokkos-nvidia' with Cuda, Threads, hwloc, mpi, gnu - USE_CUDA_ARCH="20" - USE_THREAD=ON - HWLOC_BASE_DIR="/home/sems/common/hwloc/current" - MPI_BASE_DIR="/home/sems/common/openmpi/current" - -fi - -#----------------------------------------------------------------------------- -# Incrementally construct cmake configure command line options: - -CMAKE_CONFIGURE="" -CMAKE_CXX_FLAGS="" - -#----------------------------------------------------------------------------- -# Configure for Kokkos subpackages and tests: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- - -if [ 1 ] ; then - - # Configure for Tpetra/Kokkos: - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${BLAS_LIB_DIR}" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_DIRS:FILEPATH=${LAPACK_LIB_DIR}" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Tpetra:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Kokkos:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraClassic:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Tpetra_ENABLE_Kokkos_Refactor:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D KokkosClassic_DefaultNode:STRING=Kokkos::Compat::KokkosOpenMPWrapperNode" - - CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}-DKOKKOS_FAST_COMPILE" - - if [ -n "${USE_CUDA_ARCH}" ] ; then - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Cuda:BOOL=ON" - - fi - -fi - -if [ 1 ] ; then - - # Configure for Stokhos: - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Sacado:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Stokhos:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Stokhos_ENABLE_Belos:BOOL=ON" - -fi - -if [ 1 ] ; then - - # Configure for TrilinosCouplings: - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TrilinosCouplings:BOOL=ON" - -fi - -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON" - -if [ "${CMAKE_BUILD_TYPE}" == "DEBUG" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" -fi - -#----------------------------------------------------------------------------- -# Location for installation: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" - -#----------------------------------------------------------------------------- -# MPI configuation only used for examples: -# -# Must have the MPI_BASE_DIR so that the -# include path can be passed to the Cuda compiler - -if [ -n "${MPI_BASE_DIR}" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" -else - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" -fi - -#----------------------------------------------------------------------------- -# Kokkos use pthread configuation: - -if [ "${USE_THREAD}" = "ON" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON" -else - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" -fi - -#----------------------------------------------------------------------------- -# Kokkos use OpenMP configuation: - -if [ "${USE_OPENMP}" = "ON" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" -else - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=OFF" -fi - -#----------------------------------------------------------------------------- -# Hardware locality configuration: - -if [ -n "${HWLOC_BASE_DIR}" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" -fi - -#----------------------------------------------------------------------------- -# Cuda cmake configuration: - -if [ -n "${USE_CUDA_ARCH}" ] ; -then - - # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, - # this is different than the standard CMAKE_CXX_FLAGS syntax. - - CUDA_NVCC_FLAGS="-DKOKKOS_HAVE_CUDA_ARCH=${USE_CUDA_ARCH}0;-gencode;arch=compute_${USE_CUDA_ARCH},code=sm_${USE_CUDA_ARCH}" - - if [ "${USE_OPENMP}" = "ON" ] ; - then - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" - else - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" - fi - - if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; - then - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" - else - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" - fi - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" - -fi - -#----------------------------------------------------------------------------- - -if [ "${USE_INTEL}" = "ON" -o "${USE_XEON_PHI}" = "ON" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" -fi - -# Cross-compile for Intel Xeon Phi: - -if [ "${USE_XEON_PHI}" = "ON" ] ; -then - - CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -mmic" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" - - # Cannot cross-compile fortran compatibility checks on the MIC: - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" - - # Tell cmake the answers to compile-and-execute tests - # to prevent cmake from executing a cross-compiled program. - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" - -fi - -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- - -if [ -n "${CMAKE_CXX_FLAGS}" ] ; then - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING='${CMAKE_CXX_FLAGS}'" - -fi - -#----------------------------------------------------------------------------- -# -# Remove CMake output files to force reconfigure from scratch. -# - -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -# - -echo "cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}" - -cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/configure_kokkos_bgq.sh b/lib/kokkos/config/configure_kokkos_bgq.sh deleted file mode 100755 index 73236937ea..0000000000 --- a/lib/kokkos/config/configure_kokkos_bgq.sh +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -# Additional command-line arguments given to this script will be -# passed directly to CMake. -# - -# to build: -# build on bgq-b[1-12] -# module load sierra-devel -# run this configure file -# make - -# to run: -# ssh bgq-login -# cd /scratch/username/... -# export OMP_PROC_BIND and XLSMPOPTS environment variables -# run with srun - -# Note: hwloc does not work to get or set cpubindings on bgq. -# Use the openmp backend and the openmp environment variables. -# -# Only the mpi wrappers seem to be setup for cross-compile, -# so it is important that this configure enables MPI and uses mpigcc wrappers. - - - -# -# Force CMake to re-evaluate build options. -# -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -#----------------------------------------------------------------------------- -# Incrementally construct cmake configure options: - -CMAKE_CONFIGURE="" - -#----------------------------------------------------------------------------- -# Location of Trilinos source tree: - -CMAKE_PROJECT_DIR="../Trilinos" - -# Location for installation: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=../TrilinosInstall/`date +%F`" - -#----------------------------------------------------------------------------- -# General build options. -# Use a variable so options can be propagated to CUDA compiler. - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=mpigcc-4.7.2" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=mpig++-4.7.2" - -CMAKE_VERBOSE_MAKEFILE=OFF -CMAKE_BUILD_TYPE=RELEASE -# CMAKE_BUILD_TYPE=DEBUG - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" - -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Configure packages for kokkos-only: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" - -#----------------------------------------------------------------------------- - -echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" - -cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/configure_kokkos_dev.sh b/lib/kokkos/config/configure_kokkos_dev.sh deleted file mode 100755 index ac61dec602..0000000000 --- a/lib/kokkos/config/configure_kokkos_dev.sh +++ /dev/null @@ -1,216 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -# Additional command-line arguments given to this script will be -# passed directly to CMake. -# - -# -# Force CMake to re-evaluate build options. -# -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -#----------------------------------------------------------------------------- -# Incrementally construct cmake configure options: - -CMAKE_CONFIGURE="" - -#----------------------------------------------------------------------------- -# Location of Trilinos source tree: - -CMAKE_PROJECT_DIR="${HOME}/Trilinos" - -# Location for installation: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${HOME}/TrilinosInstall/`date +%F`" - -#----------------------------------------------------------------------------- -# General build options. -# Use a variable so options can be propagated to CUDA compiler. - -CMAKE_VERBOSE_MAKEFILE=OFF -CMAKE_BUILD_TYPE=RELEASE -#CMAKE_BUILD_TYPE=DEBUG -#CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" - -#----------------------------------------------------------------------------- -# Build for CUDA architecture: - -#CUDA_ARCH="" -#CUDA_ARCH="20" -#CUDA_ARCH="30" -CUDA_ARCH="35" - -# Build with OpenMP - -OPENMP=ON -PTHREADS=ON - -# Build host code with Intel compiler: - -INTEL=OFF - -# Build for MIC architecture: - -INTEL_XEON_PHI=OFF - -# Build with HWLOC at location: - -#HWLOC_BASE_DIR="" -#HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7" -HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" - -# Location for MPI to use in examples: - -#MPI_BASE_DIR="" -#MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7" -MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.7.3" -#MPI_BASE_DIR="/home/projects/openmpi/1.7.3/llvm/2013-12-02/" - -#----------------------------------------------------------------------------- -# MPI configuation only used for examples: -# -# Must have the MPI_BASE_DIR so that the -# include path can be passed to the Cuda compiler - -if [ -n "${MPI_BASE_DIR}" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" -else - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" -fi - -#----------------------------------------------------------------------------- -# Pthread configuation: - -if [ "${PTHREADS}" = "ON" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" -else - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" -fi - -#----------------------------------------------------------------------------- -# OpenMP configuation: - -if [ "${OPENMP}" = "ON" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" -else - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" -fi - -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Configure packages for kokkos-only: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Hardware locality cmake configuration: - -if [ -n "${HWLOC_BASE_DIR}" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" -fi - -#----------------------------------------------------------------------------- -# Cuda cmake configuration: - -if [ -n "${CUDA_ARCH}" ] ; -then - - # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, - # this is different than the standard CMAKE_CXX_FLAGS syntax. - - CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" - - if [ "${OPENMP}" = "ON" ] ; - then - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" - else - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" - fi - - if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; - then - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" - else - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" - fi - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" - -fi - -#----------------------------------------------------------------------------- - -if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" -fi - -#----------------------------------------------------------------------------- - -# Cross-compile for Intel Xeon Phi: - -if [ "${INTEL_XEON_PHI}" = "ON" ] ; -then - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" - - # Cannot cross-compile fortran compatibility checks on the MIC: - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" - - # Tell cmake the answers to compile-and-execute tests - # to prevent cmake from executing a cross-compiled program. - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" - -fi - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" - -#----------------------------------------------------------------------------- - -echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" - -cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/configure_kokkos_nvidia.sh b/lib/kokkos/config/configure_kokkos_nvidia.sh deleted file mode 100644 index f78b7dce78..0000000000 --- a/lib/kokkos/config/configure_kokkos_nvidia.sh +++ /dev/null @@ -1,204 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -# Additional command-line arguments given to this script will be -# passed directly to CMake. -# - -# -# Force CMake to re-evaluate build options. -# -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -#----------------------------------------------------------------------------- -# Incrementally construct cmake configure options: - -CMAKE_CONFIGURE="" - -#----------------------------------------------------------------------------- -# Location of Trilinos source tree: - -CMAKE_PROJECT_DIR="${HOME}/Trilinos" - -# Location for installation: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/sems/common/kokkos/`date +%F`" - -#----------------------------------------------------------------------------- -# General build options. -# Use a variable so options can be propagated to CUDA compiler. - -CMAKE_VERBOSE_MAKEFILE=OFF -CMAKE_BUILD_TYPE=RELEASE -# CMAKE_BUILD_TYPE=DEBUG - -#----------------------------------------------------------------------------- -# Build for CUDA architecture: - -# CUDA_ARCH="" -CUDA_ARCH="20" -# CUDA_ARCH="30" -# CUDA_ARCH="35" - -# Build with OpenMP - -OPENMP=ON - -# Build host code with Intel compiler: - -# INTEL=ON - -# Build for MIC architecture: - -# INTEL_XEON_PHI=ON - -# Build with HWLOC at location: - -HWLOC_BASE_DIR="/home/sems/common/hwloc/current" - -# Location for MPI to use in examples: - -MPI_BASE_DIR="/home/sems/common/openmpi/current" - -#----------------------------------------------------------------------------- -# MPI configuation only used for examples: -# -# Must have the MPI_BASE_DIR so that the -# include path can be passed to the Cuda compiler - -if [ -n "${MPI_BASE_DIR}" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" -else - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" -fi - -#----------------------------------------------------------------------------- -# Pthread configuation: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" - -#----------------------------------------------------------------------------- -# OpenMP configuation: - -if [ "${OPENMP}" = "ON" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" -else - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" -fi - -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Configure packages for kokkos-only: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Hardware locality cmake configuration: - -if [ -n "${HWLOC_BASE_DIR}" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" -fi - -#----------------------------------------------------------------------------- -# Cuda cmake configuration: - -if [ -n "${CUDA_ARCH}" ] ; -then - - # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, - # this is different than the standard CMAKE_CXX_FLAGS syntax. - - CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" - - if [ "${OPENMP}" = "ON" ] ; - then - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" - else - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" - fi - - if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; - then - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" - else - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" - fi - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" - -fi - -#----------------------------------------------------------------------------- - -if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" -fi - -#----------------------------------------------------------------------------- - -# Cross-compile for Intel Xeon Phi: - -if [ "${INTEL_XEON_PHI}" = "ON" ] ; -then - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" - - # Cannot cross-compile fortran compatibility checks on the MIC: - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" - - # Tell cmake the answers to compile-and-execute tests - # to prevent cmake from executing a cross-compiled program. - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" - -fi - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" - -#----------------------------------------------------------------------------- - -echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" - -cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/configure_shannon.sh b/lib/kokkos/config/configure_shannon.sh deleted file mode 100644 index 8bd175b031..0000000000 --- a/lib/kokkos/config/configure_shannon.sh +++ /dev/null @@ -1,190 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -# Additional command-line arguments given to this script will be -# passed directly to CMake. -# - -# -# Force CMake to re-evaluate build options. -# -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -#----------------------------------------------------------------------------- -# Incrementally construct cmake configure options: - -CMAKE_CONFIGURE="" - -#----------------------------------------------------------------------------- -# Location of Trilinos source tree: - -CMAKE_PROJECT_DIR="${HOME}/Trilinos" - -# Location for installation: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/`date +%F`" - -#----------------------------------------------------------------------------- -# General build options. -# Use a variable so options can be propagated to CUDA compiler. - -CMAKE_VERBOSE_MAKEFILE=OFF -CMAKE_BUILD_TYPE=RELEASE -# CMAKE_BUILD_TYPE=DEBUG - -#----------------------------------------------------------------------------- -# Build for CUDA architecture: - -# CUDA_ARCH="" -# CUDA_ARCH="20" -# CUDA_ARCH="30" -CUDA_ARCH="35" - -# Build host code with Intel compiler: - -INTEL=ON - -# Build for MIC architecture: - -# INTEL_XEON_PHI=ON - -# Build with HWLOC at location: - -HWLOC_BASE_DIR="/home/projects/hwloc/1.6.2" - -# Location for MPI to use in examples: - -MPI_BASE_DIR="" - -#----------------------------------------------------------------------------- -# MPI configuation only used for examples: -# -# Must have the MPI_BASE_DIR so that the -# include path can be passed to the Cuda compiler - -if [ -n "${MPI_BASE_DIR}" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" -else - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" -fi - -#----------------------------------------------------------------------------- -# Pthread configuation: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" - -#----------------------------------------------------------------------------- -# OpenMP configuation: - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" - -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Configure packages for kokkos-only: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Hardware locality cmake configuration: - -if [ -n "${HWLOC_BASE_DIR}" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" -fi - -#----------------------------------------------------------------------------- -# Cuda cmake configuration: - -if [ -n "${CUDA_ARCH}" ] ; -then - - # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, - # this is different than the standard CMAKE_CXX_FLAGS syntax. - - CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" - - if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; - then - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" - else - CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" - fi - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" - -fi - -#----------------------------------------------------------------------------- - -if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; -then - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" -fi - -#----------------------------------------------------------------------------- - -# Cross-compile for Intel Xeon Phi: - -if [ "${INTEL_XEON_PHI}" = "ON" ] ; -then - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" - - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" - - # Cannot cross-compile fortran compatibility checks on the MIC: - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" - - # Tell cmake the answers to compile-and-execute tests - # to prevent cmake from executing a cross-compiled program. - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" - CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" - -fi - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" - -#----------------------------------------------------------------------------- - -echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" - -cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh b/lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh deleted file mode 100755 index 0baa83aefe..0000000000 --- a/lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh +++ /dev/null @@ -1,140 +0,0 @@ -#!/bin/bash -# -# This script uses CUDA, OpenMP, and MPI. -# -# Before invoking this script, set the OMPI_CXX environment variable -# to point to nvcc_wrapper, wherever it happens to live. (If you use -# an MPI implementation other than OpenMPI, set the corresponding -# environment variable instead.) -# - -rm -f CMakeCache.txt; -rm -rf CMakeFiles -EXTRA_ARGS=$@ -MPI_PATH="/opt/mpi/openmpi/1.8.2/nvcc-gcc/4.8.3-6.5" -CUDA_PATH="/opt/nvidia/cuda/6.5.14" - -# -# As long as there are any .cu files in Trilinos, we'll need to set -# CUDA_NVCC_FLAGS. If Trilinos gets rid of all of its .cu files and -# lets nvcc_wrapper handle them as .cpp files, then we won't need to -# set CUDA_NVCC_FLAGS. As it is, given that we need to set -# CUDA_NVCC_FLAGS, we must make sure that they are the same flags as -# nvcc_wrapper passes to nvcc. -# -CUDA_NVCC_FLAGS="-gencode;arch=compute_35,code=sm_35;-I${MPI_PATH}/include" -CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" -CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3;-DKOKKOS_USE_CUDA_UVM" - -cmake \ - -D CMAKE_INSTALL_PREFIX:PATH="$PWD/../install/" \ - -D CMAKE_BUILD_TYPE:STRING=DEBUG \ - -D CMAKE_CXX_FLAGS:STRING="-g -Wall" \ - -D CMAKE_C_FLAGS:STRING="-g -Wall" \ - -D CMAKE_FORTRAN_FLAGS:STRING="" \ - -D CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS="" \ - -D Trilinos_ENABLE_Triutils=OFF \ - -D Trilinos_ENABLE_INSTALL_CMAKE_CONFIG_FILES:BOOL=OFF \ - -D Trilinos_ENABLE_DEBUG:BOOL=OFF \ - -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF \ - -D Trilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=OFF \ - -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING="" \ - -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF \ - -D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES:BOOL=OFF \ - -D BUILD_SHARED_LIBS:BOOL=OFF \ - -D DART_TESTING_TIMEOUT:STRING=600 \ - -D CMAKE_VERBOSE_MAKEFILE:BOOL=OFF \ - \ - \ - -D CMAKE_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \ - -D CMAKE_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \ - -D MPI_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \ - -D MPI_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \ - -D CMAKE_Fortran_COMPILER:FILEPATH="${MPI_PATH}/bin/mpif77" \ - -D MPI_EXEC:FILEPATH="${MPI_PATH}/bin/mpirun" \ - -D MPI_EXEC_POST_NUMPROCS_FLAGS:STRING="-bind-to;socket;--map-by;socket;env;CUDA_MANAGED_FORCE_DEVICE_ALLOC=1;CUDA_LAUNCH_BLOCKING=1;OMP_NUM_THREADS=2" \ - \ - \ - -D Trilinos_ENABLE_CXX11:BOOL=OFF \ - -D TPL_ENABLE_MPI:BOOL=ON \ - -D Trilinos_ENABLE_OpenMP:BOOL=ON \ - -D Trilinos_ENABLE_ThreadPool:BOOL=ON \ - \ - \ - -D TPL_ENABLE_CUDA:BOOL=ON \ - -D CUDA_TOOLKIT_ROOT_DIR:FILEPATH="${CUDA_PATH}" \ - -D CUDA_PROPAGATE_HOST_FLAGS:BOOL=OFF \ - -D TPL_ENABLE_Thrust:BOOL=OFF \ - -D Thrust_INCLUDE_DIRS:FILEPATH="${CUDA_PATH}/include" \ - -D TPL_ENABLE_CUSPARSE:BOOL=OFF \ - -D TPL_ENABLE_Cusp:BOOL=OFF \ - -D Cusp_INCLUDE_DIRS="/home/crtrott/Software/cusp" \ - -D CUDA_VERBOSE_BUILD:BOOL=OFF \ - -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS} \ - \ - \ - -D TPL_ENABLE_HWLOC=OFF \ - -D HWLOC_INCLUDE_DIRS="/usr/local/software/hwloc/current/include" \ - -D HWLOC_LIBRARY_DIRS="/usr/local/software/hwloc/current/lib" \ - -D TPL_ENABLE_BinUtils=OFF \ - -D TPL_ENABLE_BLAS:STRING=ON \ - -D TPL_ENABLE_LAPACK:STRING=ON \ - -D TPL_ENABLE_MKL:STRING=OFF \ - -D TPL_ENABLE_HWLOC:STRING=OFF \ - -D TPL_ENABLE_GTEST:STRING=ON \ - -D TPL_ENABLE_SuperLU=ON \ - -D TPL_ENABLE_BLAS=ON \ - -D TPL_ENABLE_LAPACK=ON \ - -D TPL_SuperLU_LIBRARIES="/home/crtrott/Software/SuperLU_4.3/lib/libsuperlu_4.3.a" \ - -D TPL_SuperLU_INCLUDE_DIRS="/home/crtrott/Software/SuperLU_4.3/SRC" \ - \ - \ - -D Trilinos_Enable_Kokkos:BOOL=ON \ - -D Trilinos_ENABLE_KokkosCore:BOOL=ON \ - -D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON \ - -D Trilinos_ENABLE_KokkosContainers:BOOL=ON \ - -D Trilinos_ENABLE_TpetraKernels:BOOL=ON \ - -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON \ - -D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON \ - -D Trilinos_ENABLE_KokkosExample:BOOL=ON \ - -D Kokkos_ENABLE_EXAMPLES:BOOL=ON \ - -D Kokkos_ENABLE_TESTS:BOOL=OFF \ - -D KokkosClassic_DefaultNode:STRING="Kokkos::Compat::KokkosCudaWrapperNode" \ - -D TpetraClassic_ENABLE_OpenMPNode=OFF \ - -D TpetraClassic_ENABLE_TPINode=OFF \ - -D TpetraClassic_ENABLE_MKL=OFF \ - -D Kokkos_ENABLE_Cuda_UVM=ON \ - \ - \ - -D Trilinos_ENABLE_Teuchos:BOOL=ON \ - -D Teuchos_ENABLE_COMPLEX:BOOL=OFF \ - \ - \ - -D Trilinos_ENABLE_Tpetra:BOOL=ON \ - -D Tpetra_ENABLE_KokkosCore=ON \ - -D Tpetra_ENABLE_Kokkos_DistObject=OFF \ - -D Tpetra_ENABLE_Kokkos_Refactor=ON \ - -D Tpetra_ENABLE_TESTS=ON \ - -D Tpetra_ENABLE_EXAMPLES=ON \ - -D Tpetra_ENABLE_MPI_CUDA_RDMA:BOOL=ON \ - \ - \ - -D Trilinos_ENABLE_Belos=OFF \ - -D Trilinos_ENABLE_Amesos=OFF \ - -D Trilinos_ENABLE_Amesos2=OFF \ - -D Trilinos_ENABLE_Ifpack=OFF \ - -D Trilinos_ENABLE_Ifpack2=OFF \ - -D Trilinos_ENABLE_Epetra=OFF \ - -D Trilinos_ENABLE_EpetraExt=OFF \ - -D Trilinos_ENABLE_Zoltan=OFF \ - -D Trilinos_ENABLE_Zoltan2=OFF \ - -D Trilinos_ENABLE_MueLu=OFF \ - -D Belos_ENABLE_TESTS=ON \ - -D Belos_ENABLE_EXAMPLES=ON \ - -D MueLu_ENABLE_TESTS=ON \ - -D MueLu_ENABLE_EXAMPLES=ON \ - -D Ifpack2_ENABLE_TESTS=ON \ - -D Ifpack2_ENABLE_EXAMPLES=ON \ - $EXTRA_ARGS \ -${HOME}/Trilinos - diff --git a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt deleted file mode 100644 index 0f24487814..0000000000 --- a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt +++ /dev/null @@ -1,148 +0,0 @@ -// -------------------------------------------------------------------------------- // - -The following steps are for workstations/servers with the SEMS environment installed. - -// -------------------------------------------------------------------------------- // -Summary: - -- Step 1: Rigorous testing of Kokkos' develop branch for each backend (Serial, OpenMP, Threads, Cuda) with all supported compilers. - -- Step 2: Snapshot Kokkos' develop branch into current Trilinos develop branch. - -- Step 3: Build and test Trilinos with combinations of compilers, types, backends. - -- Step 4: Promote Kokkos develop branch to master if the snapshot does not cause any new tests to fail; else track/fix causes of new failures. - -- Step 5: Snapshot Kokkos tagged master branch into Trilinos and push Trilinos. -// -------------------------------------------------------------------------------- // - - -// -------------------------------------------------------------------------------- // - -Step 1: - 1.1. Update kokkos develop branch (NOT a fork) - - (From kokkos directory): - git fetch --all - git checkout develop - git reset --hard origin/develop - - 1.2. Create a testing directory - here the directory is created within the kokkos directory - - mkdir testing - cd testing - - 1.3. Run the test_all_sandia script; various compiler and build-list options can be specified - - ../config/test_all_sandia - - 1.4 Clean repository of untracked files - - cd ../ - git clean -df - -// -------------------------------------------------------------------------------- // - -Step 2: - 2.1 Update Trilinos develop branch - - (From Trilinos directory): - git checkout develop - git fetch --all - git reset --hard origin/develop - git clean -df - - 2.2 Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files - - module load python/2.7.9 - python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages - -// -------------------------------------------------------------------------------- // - -Step 3: - 3.1. Build and test Trilinos with 4 different configurations; Run scripts for white and shepard are provided in kokkos/config/trilinos-integration - - Usually its a good idea to run those script via nohup. - You can run all four at the same time, use separate directories for each. - - 3.2. Compare the failed test output between the pristine and the updated runs; investigate and fix problems if new tests fail after the Kokkos snapshot - -// -------------------------------------------------------------------------------- // - -Step 4: Once all Trilinos tests pass promote Kokkos develop branch to master on Github - 4.1. Generate Changelog (You need a github API token) - - Close all Open issues with "InDevelop" tag on github - - (Not from kokkos directory) - gitthub_changelog_generator kokkos/kokkos --token TOKEN --no-pull-requests --include-labels 'InDevelop' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG' - - (Copy the new section from the generated CHANGELOG.md to the kokkos/CHANGELOG.md) - (Make desired changes to CHANGELOG.md to enhance clarity) - (Commit and push the CHANGELOG to develop) - - 4.2 Merge develop into Master - - - DO NOT fast-forward the merge!!!! - - (From kokkos directory): - git checkout master - git fetch --all - # Ensure we are on the current origin/master - git reset --hard origin/master - git merge --no-ff origin/develop - - 4.3. Update the tag in kokkos/config/master_history.txt - Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate - Tag format: #.#.## - - # Prepend master_history.txt with - - # tag: #.#.## - # date: mm/dd/yyyy - # master: sha1 - # develop: sha1 - # ----------------------- - - git commit --amend -a - - git tag -a #.#.## - tag: #.#.## - date: mm/dd/yyyy - master: sha1 - develop: sha1 - - 4.4. Do NOT push yet - -// -------------------------------------------------------------------------------- // - -Step 5: - 5.1. Make sure Trilinos is up-to-date - chances are other changes have been committed since the integration testing process began. If a substantial change has occurred that may be affected by the snapshot the testing procedure may need to be repeated - - (From Trilinos directory): - git checkout develop - git fetch --all - git reset --hard origin/develop - git clean -df - - 5.2. Snapshot Kokkos master branch into Trilinos - - (From kokkos directory): - git fetch --all - git checkout tags/#.#.## - git clean -df - - python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages - - 5.3. Run checkin-test to push to trilinos using the CI build modules (gcc/4.9.3) - - The modules are listed in kokkos/config/trilinos-integration/checkin-test - Run checkin-test, forward dependencies and optional dependencies must be enabled - If push failed because someone else clearly broke something, push manually. - If push failed for unclear reasons, investigate, fix, and potentially start over from step 2 after reseting your local kokkos/master branch - -Step 6: Push Kokkos to master - - git push --follow-tags origin master - -// -------------------------------------------------------------------------------- // diff --git a/lib/kokkos/config/kokkos_dev/config-core-all.sh b/lib/kokkos/config/kokkos_dev/config-core-all.sh deleted file mode 100755 index 1867de7204..0000000000 --- a/lib/kokkos/config/kokkos_dev/config-core-all.sh +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -#----------------------------------------------------------------------------- -# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: -# -# Cuda, OpenMP, Threads, Qthreads, hwloc -# -# module loaded on 'kokkos-dev.sandia.gov' for this build -# -# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu -# -# The 'nvcc-wrapper' module should load a script that matches -# kokkos/bin/nvcc_wrapper -# -#----------------------------------------------------------------------------- -# Source and installation directories: - -TRILINOS_SOURCE_DIR=${HOME}/Trilinos -TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` - -CMAKE_CONFIGURE="" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" - -#----------------------------------------------------------------------------- -# Debug/optimized - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" - -#----------------------------------------------------------------------------- -# Cuda using GNU, use the nvcc_wrapper to build CUDA source - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" - -#----------------------------------------------------------------------------- -# Configure for Kokkos subpackages and tests: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -# Hardware locality configuration: - -HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" - -#----------------------------------------------------------------------------- -# Pthread - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON" - -#----------------------------------------------------------------------------- -# OpenMP - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" - -#----------------------------------------------------------------------------- -# Qthreads - -QTHREADS_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREADS:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREADS_INCLUDE_DIRS:FILEPATH=${QTHREADS_BASE_DIR}/include" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREADS_LIBRARY_DIRS:FILEPATH=${QTHREADS_BASE_DIR}/lib" - -#----------------------------------------------------------------------------- -# C++11 - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" - -#----------------------------------------------------------------------------- -# -# Remove CMake output files to force reconfigure from scratch. -# - -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -# - -echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} diff --git a/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh b/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh deleted file mode 100755 index 5a6cc1493e..0000000000 --- a/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -#----------------------------------------------------------------------------- -# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: -# -# Cuda, OpenMP, hwloc -# -# module loaded on 'kokkos-dev.sandia.gov' for this build -# -# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu -# -# The 'nvcc-wrapper' module should load a script that matches -# kokkos/bin/nvcc_wrapper -# -#----------------------------------------------------------------------------- -# Source and installation directories: - -TRILINOS_SOURCE_DIR=${HOME}/Trilinos -TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` - -CMAKE_CONFIGURE="" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" - -#----------------------------------------------------------------------------- -# Debug/optimized - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" - -#----------------------------------------------------------------------------- -# Cuda using GNU, use the nvcc_wrapper to build CUDA source - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" - -#----------------------------------------------------------------------------- -# Configure for Kokkos subpackages and tests: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -# Hardware locality configuration: - -HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" - -#----------------------------------------------------------------------------- -# Pthread explicitly OFF so tribits doesn't automatically turn it on - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" - -#----------------------------------------------------------------------------- -# OpenMP - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" - -#----------------------------------------------------------------------------- -# C++11 - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" - -#----------------------------------------------------------------------------- -# -# Remove CMake output files to force reconfigure from scratch. -# - -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -# - -echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/kokkos_dev/config-core-cuda.sh b/lib/kokkos/config/kokkos_dev/config-core-cuda.sh deleted file mode 100755 index 606755da81..0000000000 --- a/lib/kokkos/config/kokkos_dev/config-core-cuda.sh +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -#----------------------------------------------------------------------------- -# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: -# -# Cuda -# -# module loaded on 'kokkos-dev.sandia.gov' for this build -# -# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu -# -# The 'nvcc-wrapper' module should load a script that matches -# kokkos/bin/nvcc_wrapper -# -#----------------------------------------------------------------------------- -# Source and installation directories: - -TRILINOS_SOURCE_DIR=${HOME}/Trilinos -TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` - -CMAKE_CONFIGURE="" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" - -#----------------------------------------------------------------------------- -# Debug/optimized - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" - -#----------------------------------------------------------------------------- -# Cuda using GNU, use the nvcc_wrapper to build CUDA source - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" - -# Pthread explicitly OFF, otherwise tribits will automatically turn it on - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" - -#----------------------------------------------------------------------------- -# Configure for Kokkos subpackages and tests: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -# C++11 - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" - -#----------------------------------------------------------------------------- -# -# Remove CMake output files to force reconfigure from scratch. -# - -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -# - -echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh deleted file mode 100755 index b83a535416..0000000000 --- a/lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -#----------------------------------------------------------------------------- -# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: -# -# C++11, OpenMP -# -# module loaded on 'kokkos-dev.sandia.gov' for this build -# -# module load cmake/2.8.11.2 gcc/4.8.3 -# -#----------------------------------------------------------------------------- -# Source and installation directories: - -TRILINOS_SOURCE_DIR=${HOME}/Trilinos -TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` - -CMAKE_CONFIGURE="" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" - -#----------------------------------------------------------------------------- -# Debug/optimized - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" - -#----------------------------------------------------------------------------- -# Configure for Kokkos subpackages and tests: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -# Pthread explicitly OFF so tribits doesn't automatically activate - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" - -#----------------------------------------------------------------------------- -# OpenMP - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" - -#----------------------------------------------------------------------------- -# C++11 - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" - -#----------------------------------------------------------------------------- -# -# Remove CMake output files to force reconfigure from scratch. -# - -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -# - -echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh b/lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh deleted file mode 100755 index d2e06a4ebd..0000000000 --- a/lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -#----------------------------------------------------------------------------- -# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: -# -# -# -# module loaded on 'kokkos-dev.sandia.gov' for this build -# -# module load cmake/2.8.11.2 gcc/4.8.3 -# -#----------------------------------------------------------------------------- -# Source and installation directories: - -TRILINOS_SOURCE_DIR=${HOME}/Trilinos -TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` - -CMAKE_CONFIGURE="" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" - -#----------------------------------------------------------------------------- -# Debug/optimized - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" - -#----------------------------------------------------------------------------- -# Configure for Kokkos subpackages and tests: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -# Kokkos Pthread explicitly OFF, TPL Pthread ON for gtest - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" - -#----------------------------------------------------------------------------- -# C++11 - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" - -#----------------------------------------------------------------------------- -# -# Remove CMake output files to force reconfigure from scratch. -# - -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -# - -echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh deleted file mode 100755 index e2ab1f1c00..0000000000 --- a/lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -#----------------------------------------------------------------------------- -# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: -# -# Intel, OpenMP, Cuda -# -# module loaded on 'kokkos-dev.sandia.gov' for this build -# -# module load cmake/2.8.11.2 cuda/7.0.4 intel/2015.0.090 nvcc-wrapper/intel -# -#----------------------------------------------------------------------------- -# Source and installation directories: - -TRILINOS_SOURCE_DIR=${HOME}/Trilinos -TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` - -CMAKE_CONFIGURE="" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" - -#----------------------------------------------------------------------------- -# Debug/optimized - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" - -#----------------------------------------------------------------------------- -# Configure for Kokkos subpackages and tests: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -# Pthread explicitly OFF - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" - -#----------------------------------------------------------------------------- -# OpenMP - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" - -#----------------------------------------------------------------------------- -# C++11 - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" - -#----------------------------------------------------------------------------- -# -# Remove CMake output files to force reconfigure from scratch. -# - -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -# - -echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh deleted file mode 100755 index fd56d41161..0000000000 --- a/lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -#----------------------------------------------------------------------------- -# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: -# -# Intel, OpenMP -# -# module loaded on 'kokkos-dev.sandia.gov' for this build -# -# module load cmake/2.8.11.2 intel/13.SP1.1.106 -# -#----------------------------------------------------------------------------- -# Source and installation directories: - -TRILINOS_SOURCE_DIR=${HOME}/Trilinos -TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` - -CMAKE_CONFIGURE="" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" - -#----------------------------------------------------------------------------- -# Debug/optimized - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" - -#----------------------------------------------------------------------------- -# Configure for Kokkos subpackages and tests: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -# Pthread explicitly OFF - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" - -#----------------------------------------------------------------------------- -# OpenMP - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" - -#----------------------------------------------------------------------------- -# C++11 - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" - -#----------------------------------------------------------------------------- -# -# Remove CMake output files to force reconfigure from scratch. -# - -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -# - -echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/kokkos_dev/config-core-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-omp.sh deleted file mode 100755 index f91ecd5254..0000000000 --- a/lib/kokkos/config/kokkos_dev/config-core-omp.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -#----------------------------------------------------------------------------- -# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: -# -# OpenMP -# -# module loaded on 'kokkos-dev.sandia.gov' for this build -# -# module load cmake/2.8.11.2 gcc/4.8.3 -# -#----------------------------------------------------------------------------- -# Source and installation directories: - -TRILINOS_SOURCE_DIR=${HOME}/Trilinos -TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` - -CMAKE_CONFIGURE="" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" - -#----------------------------------------------------------------------------- -# Debug/optimized - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" - -#----------------------------------------------------------------------------- -# Configure for Kokkos subpackages and tests: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -# OpenMP - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" - -# Pthread explicitly OFF, otherwise tribits will automatically turn it on - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" - -#----------------------------------------------------------------------------- -# -# Remove CMake output files to force reconfigure from scratch. -# - -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -# - -echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh b/lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh deleted file mode 100755 index 19ab969023..0000000000 --- a/lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/sh -# -# Copy this script, put it outside the Trilinos source directory, and -# build there. -# -#----------------------------------------------------------------------------- -# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: -# -# Threads, hwloc -# -# module loaded on 'kokkos-dev.sandia.gov' for this build -# -# module load cmake/2.8.11.2 gcc/4.8.3 -# -#----------------------------------------------------------------------------- -# Source and installation directories: - -TRILINOS_SOURCE_DIR=${HOME}/Trilinos -TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` - -CMAKE_CONFIGURE="" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" - -#----------------------------------------------------------------------------- -# Debug/optimized - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" - -#----------------------------------------------------------------------------- - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" - -#----------------------------------------------------------------------------- -# Configure for Kokkos subpackages and tests: - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" - -#----------------------------------------------------------------------------- -# Hardware locality configuration: - -HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" - -#----------------------------------------------------------------------------- -# Pthread - -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON" - -#----------------------------------------------------------------------------- -# C++11 - -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" -# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" - -#----------------------------------------------------------------------------- -# -# Remove CMake output files to force reconfigure from scratch. -# - -rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* - -# - -echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/nvcc_wrapper b/lib/kokkos/config/nvcc_wrapper deleted file mode 100755 index d339da4fcd..0000000000 --- a/lib/kokkos/config/nvcc_wrapper +++ /dev/null @@ -1,340 +0,0 @@ -#!/bin/bash -# -# This shell script (nvcc_wrapper) wraps both the host compiler and -# NVCC, if you are building legacy C or C++ code with CUDA enabled. -# The script remedies some differences between the interface of NVCC -# and that of the host compiler, in particular for linking. -# It also means that a legacy code doesn't need separate .cu files; -# it can just use .cpp files. -# -# Default settings: change those according to your machine. For -# example, you may have have two different wrappers with either icpc -# or g++ as their back-end compiler. The defaults can be overwritten -# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc). - -default_arch="sm_35" -#default_arch="sm_50" - -# -# The default C++ compiler. -# -host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"} -#host_compiler="icpc" -#host_compiler="/usr/local/gcc/4.8.3/bin/g++" -#host_compiler="/usr/local/gcc/4.9.1/bin/g++" - -# -# Internal variables -# - -# C++ files -cpp_files="" - -# Host compiler arguments -xcompiler_args="" - -# Cuda (NVCC) only arguments -cuda_args="" - -# Arguments for both NVCC and Host compiler -shared_args="" - -# Argument -c -compile_arg="" - -# Argument -o -output_arg="" - -# Linker arguments -xlinker_args="" - -# Object files passable to NVCC -object_files="" - -# Link objects for the host linker only -object_files_xlinker="" - -# Shared libraries with version numbers are not handled correctly by NVCC -shared_versioned_libraries_host="" -shared_versioned_libraries="" - -# Does the User set the architecture -arch_set=0 - -# Does the user overwrite the host compiler -ccbin_set=0 - -#Error code of compilation -error_code=0 - -# Do a dry run without actually compiling -dry_run=0 - -# Skip NVCC compilation and use host compiler directly -host_only=0 -host_only_args="" - -# Enable workaround for CUDA 6.5 for pragma ident -replace_pragma_ident=0 - -# Mark first host compiler argument -first_xcompiler_arg=1 - -temp_dir=${TMPDIR:-/tmp} - -# Check if we have an optimization argument already -optimization_applied=0 - -# Check if we have -std=c++X or --std=c++X already -stdcxx_applied=0 - -# Run nvcc a second time to generate dependencies if needed -depfile_separate=0 -depfile_output_arg="" -depfile_target_arg="" - -#echo "Arguments: $# $@" - -while [ $# -gt 0 ] -do - case $1 in - #show the executed command - --show|--nvcc-wrapper-show) - dry_run=1 - ;; - #run host compilation only - --host-only) - host_only=1 - ;; - #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros - --replace-pragma-ident) - replace_pragma_ident=1 - ;; - #handle source files to be compiled as cuda files - *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu) - cpp_files="$cpp_files $1" - ;; - # Ensure we only have one optimization flag because NVCC doesn't allow muliple - -O*) - if [ $optimization_applied -eq 1 ]; then - echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting." - else - shared_args="$shared_args $1" - optimization_applied=1 - fi - ;; - #Handle shared args (valid for both nvcc and the host compiler) - -D*|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) - shared_args="$shared_args $1" - ;; - #Handle compilation argument - -c) - compile_arg="$1" - ;; - #Handle output argument - -o) - output_arg="$output_arg $1 $2" - shift - ;; - # Handle depfile arguments. We map them to a separate call to nvcc. - -MD|-MMD) - depfile_separate=1 - host_only_args="$host_only_args $1" - ;; - -MF) - depfile_output_arg="-o $2" - host_only_args="$host_only_args $1 $2" - shift - ;; - -MT) - depfile_target_arg="$1 $2" - host_only_args="$host_only_args $1 $2" - shift - ;; - #Handle known nvcc args - -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*) - cuda_args="$cuda_args $1" - ;; - #Handle more known nvcc args - --expt-extended-lambda|--expt-relaxed-constexpr) - cuda_args="$cuda_args $1" - ;; - #Handle known nvcc args that have an argument - -rdc|-maxrregcount|--default-stream) - cuda_args="$cuda_args $1 $2" - shift - ;; - #Handle c++11 - --std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++1z|-std=c++1z) - if [ $stdcxx_applied -eq 1 ]; then - echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-std=c++1* or --std=c++1*), only the first is used because nvcc can only accept a single std setting" - else - shared_args="$shared_args $1" - stdcxx_applied=1 - fi - ;; - - #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98 - -std=c++98|--std=c++98) - ;; - #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor - -pedantic|-Wpedantic|-ansi) - ;; - #strip of -Woverloaded-virtual to avoid "cc1: warning: command line option ‘-Woverloaded-virtual’ is valid for C++/ObjC++ but not for C" - -Woverloaded-virtual) - ;; - #strip -Xcompiler because we add it - -Xcompiler) - if [ $first_xcompiler_arg -eq 1 ]; then - xcompiler_args="$2" - first_xcompiler_arg=0 - else - xcompiler_args="$xcompiler_args,$2" - fi - shift - ;; - #strip of "-x cu" because we add that - -x) - if [[ $2 != "cu" ]]; then - if [ $first_xcompiler_arg -eq 1 ]; then - xcompiler_args="-x,$2" - first_xcompiler_arg=0 - else - xcompiler_args="$xcompiler_args,-x,$2" - fi - fi - shift - ;; - #Handle -ccbin (if its not set we can set it to a default value) - -ccbin) - cuda_args="$cuda_args $1 $2" - ccbin_set=1 - host_compiler=$2 - shift - ;; - #Handle -arch argument (if its not set use a default - -arch*) - cuda_args="$cuda_args $1" - arch_set=1 - ;; - #Handle -Xcudafe argument - -Xcudafe) - cuda_args="$cuda_args -Xcudafe $2" - shift - ;; - #Handle args that should be sent to the linker - -Wl*) - xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}" - host_linker_args="$host_linker_args ${1:4:${#1}}" - ;; - #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking - *.a|*.so|*.o|*.obj) - object_files="$object_files $1" - object_files_xlinker="$object_files_xlinker -Xlinker $1" - ;; - #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking - @*|*.dylib) - object_files="$object_files -Xlinker $1" - object_files_xlinker="$object_files_xlinker -Xlinker $1" - ;; - #Handle shared libraries with *.so.* names which nvcc can't do. - *.so.*) - shared_versioned_libraries_host="$shared_versioned_libraries_host $1" - shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1" - ;; - #All other args are sent to the host compiler - *) - if [ $first_xcompiler_arg -eq 1 ]; then - xcompiler_args=$1 - first_xcompiler_arg=0 - else - xcompiler_args="$xcompiler_args,$1" - fi - ;; - esac - - shift -done - -#Add default host compiler if necessary -if [ $ccbin_set -ne 1 ]; then - cuda_args="$cuda_args -ccbin $host_compiler" -fi - -#Add architecture command -if [ $arch_set -ne 1 ]; then - cuda_args="$cuda_args -arch=$default_arch" -fi - -#Compose compilation command -nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries" -if [ $first_xcompiler_arg -eq 0 ]; then - nvcc_command="$nvcc_command -Xcompiler $xcompiler_args" -fi - -#Compose host only command -host_command="$host_compiler $shared_args $host_only_args $compile_arg $output_arg $xcompiler_args $host_linker_args $shared_versioned_libraries_host" - -#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING' -if [ $replace_pragma_ident -eq 1 ]; then - cpp_files2="" - for file in $cpp_files - do - var=`grep pragma ${file} | grep ident | grep "#"` - if [ "${#var}" -gt 0 ] - then - sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file - cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file" - else - cpp_files2="$cpp_files2 $file" - fi - done - cpp_files=$cpp_files2 - #echo $cpp_files -fi - -if [ "$cpp_files" ]; then - nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files" -else - nvcc_command="$nvcc_command $object_files" -fi - -if [ "$cpp_files" ]; then - host_command="$host_command $object_files $cpp_files" -else - host_command="$host_command $object_files" -fi - -if [ $depfile_separate -eq 1 ]; then - # run nvcc a second time to generate dependencies (without compiling) - nvcc_depfile_command="$nvcc_command -M $depfile_target_arg $depfile_output_arg" -else - nvcc_depfile_command="" -fi - -nvcc_command="$nvcc_command $compile_arg $output_arg" - -#Print command for dryrun -if [ $dry_run -eq 1 ]; then - if [ $host_only -eq 1 ]; then - echo $host_command - elif [ -n "$nvcc_depfile_command" ]; then - echo $nvcc_command "&&" $nvcc_depfile_command - else - echo $nvcc_command - fi - exit 0 -fi - -#Run compilation command -if [ $host_only -eq 1 ]; then - $host_command -elif [ -n "$nvcc_depfile_command" ]; then - $nvcc_command && $nvcc_depfile_command -else - $nvcc_command -fi -error_code=$? - -#Report error code -exit $error_code diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia index 660ab91ff5..28b4a64b10 100755 --- a/lib/kokkos/config/test_all_sandia +++ b/lib/kokkos/config/test_all_sandia @@ -14,25 +14,52 @@ PROCESSOR=`uname -p` if [[ "$HOSTNAME" =~ (white|ride).* ]]; then MACHINE=white -elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then + module load git +fi + +if [[ "$HOSTNAME" =~ .*bowman.* ]]; then MACHINE=bowman -elif [[ "$HOSTNAME" =~ n.* ]]; then # Warning: very generic name + module load git +fi + +if [[ "$HOSTNAME" =~ n.* ]]; then # Warning: very generic name if [[ "$PROCESSOR" = "aarch64" ]]; then MACHINE=sullivan + module load git fi -elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name +fi + +if [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name + if [[ "$MACHINE" = "" ]]; then MACHINE=shepard -elif [[ "$HOSTNAME" =~ apollo ]]; then + module load git + fi +fi + +if [[ "$HOSTNAME" =~ apollo ]]; then MACHINE=apollo -elif [[ "$HOSTNAME" =~ sullivan ]]; then + module load git +fi + +if [[ "$HOSTNAME" =~ sullivan ]]; then MACHINE=sullivan -elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then - MACHINE=sems -else + module load git +fi + +if [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then + if [[ "$MACHINE" = "" ]]; then + MACHINE=sems + module load sems-git + fi +fi + +if [[ "$MACHINE" = "" ]]; then echo "Unrecognized machine" >&2 exit 1 fi +echo "Running on machine: $MACHINE" + GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" ARM_GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" @@ -45,7 +72,8 @@ GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" -CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +#CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" PGI_WARNING_FLAGS="" # Default. Machine specific can override. @@ -142,6 +170,18 @@ else KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) fi +UNCOMMITTED=`cd ${KOKKOS_PATH}; git status --porcelain 2>/dev/null` +if ! [ -z "$UNCOMMITTED" ]; then + echo "WARNING!! THE FOLLOWING CHANGES ARE UNCOMMITTED!! :" + echo "$UNCOMMITTED" + echo "" +fi + +GITSTATUS=`cd ${KOKKOS_PATH}; git log -n 1 --format=oneline` +echo "Repository Status: " ${GITSTATUS} +echo "" +echo "" + # # Machine specific config. # @@ -149,7 +189,7 @@ fi if [ "$MACHINE" = "sems" ]; then source /projects/sems/modulefiles/utils/sems-modules-init.sh - BASE_MODULE_LIST="sems-env,kokkos-env,sems-/,kokkos-hwloc/1.10.1/base" + BASE_MODULE_LIST="sems-env,kokkos-env,kokkos-hwloc/1.10.1/base,sems-/" CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" @@ -178,9 +218,9 @@ if [ "$MACHINE" = "sems" ]; then "clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" - "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" - "cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) fi elif [ "$MACHINE" = "white" ]; then @@ -191,14 +231,14 @@ elif [ "$MACHINE" = "white" ]; then BASE_MODULE_LIST="/" IBM_MODULE_LIST="/xl/" CUDA_MODULE_LIST="/,gcc/5.4.0" - CUDA_MODULE_LIST2="/,gcc/6.3.0,ibm/xl/13.1.6-BETA" + CUDA_MODULE_LIST2="/,gcc/6.3.0,ibm/xl/13.1.6" # Don't do pthread on white. GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" + "ibm/13.1.6 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/9.0.103 $CUDA_MODULE_LIST2 $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) @@ -281,7 +321,7 @@ elif [ "$MACHINE" = "apollo" ]; then CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" - CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,/,cuda/8.0.44" + CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,/,cuda/9.0.69" NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,/,sems-gcc/5.3.0" BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP" @@ -294,13 +334,13 @@ elif [ "$MACHINE" = "apollo" ]; then "gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" - "clang/4.0.0 $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS" - "cuda/8.0.44 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS" + "cuda/9.1 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) else # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" - "clang/4.0.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS" + COMPILERS=("cuda/9.1 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "clang/6.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS" "clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS" "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" @@ -311,13 +351,11 @@ elif [ "$MACHINE" = "apollo" ]; then "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" - "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) fi if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="--arch=SNB,Kepler35" + ARCH_FLAG="--arch=SNB,Volta70" fi NUM_JOBS_TO_RUN_IN_PARALLEL=2 @@ -700,17 +738,19 @@ wait_summarize_and_exit() { echo $passed_test $(cat $PASSED_DIR/$passed_test) done - echo "#######################################################" - echo "FAILED TESTS" - echo "#######################################################" - - local failed_test local -i rv=0 - for failed_test in $(\ls -1 $FAILED_DIR | sort) - do - echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)" - rv=$rv+1 - done + if [ "$(ls -A $FAILED_DIR)" ]; then + echo "#######################################################" + echo "FAILED TESTS" + echo "#######################################################" + + local failed_test + for failed_test in $(\ls -1 $FAILED_DIR | sort) + do + echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)" + rv=$rv+1 + done + fi exit $rv } diff --git a/lib/kokkos/containers/performance_tests/TestCuda.cpp b/lib/kokkos/containers/performance_tests/TestCuda.cpp index 208387425f..682f3f52f7 100644 --- a/lib/kokkos/containers/performance_tests/TestCuda.cpp +++ b/lib/kokkos/containers/performance_tests/TestCuda.cpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp index ced74c6f51..0d2fae32a3 100644 --- a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp +++ b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER @@ -64,8 +64,8 @@ struct InitViewFunctor { KOKKOS_INLINE_FUNCTION void operator()(const int i) const { - for (unsigned j = 0; j < _inview.dimension(1); ++j) { - for (unsigned k = 0; k < _inview.dimension(2); ++k) { + for (unsigned j = 0; j < _inview.extent(1); ++j) { + for (unsigned k = 0; k < _inview.extent(2); ++k) { _inview(i,j,k) = i/2 -j*j + k/3; } } @@ -84,8 +84,8 @@ struct InitViewFunctor { KOKKOS_INLINE_FUNCTION void operator()(const int i) const { - for (unsigned j = 0; j < _inview.dimension(1); ++j) { - for (unsigned k = 0; k < _inview.dimension(2); ++k) { + for (unsigned j = 0; j < _inview.extent(1); ++j) { + for (unsigned k = 0; k < _inview.extent(2); ++k) { _outview(i) += _inview(i,j,k) ; } } @@ -104,8 +104,8 @@ struct InitStrideViewFunctor { KOKKOS_INLINE_FUNCTION void operator()(const int i) const { - for (unsigned j = 0; j < _inview.dimension(1); ++j) { - for (unsigned k = 0; k < _inview.dimension(2); ++k) { + for (unsigned j = 0; j < _inview.extent(1); ++j) { + for (unsigned k = 0; k < _inview.extent(2); ++k) { _inview(i,j,k) = i/2 -j*j + k/3; } } @@ -123,8 +123,8 @@ struct InitViewRank7Functor { KOKKOS_INLINE_FUNCTION void operator()(const int i) const { - for (unsigned j = 0; j < _inview.dimension(1); ++j) { - for (unsigned k = 0; k < _inview.dimension(2); ++k) { + for (unsigned j = 0; j < _inview.extent(1); ++j) { + for (unsigned k = 0; k < _inview.extent(2); ++k) { _inview(i,j,k,0,0,0,0) = i/2 -j*j + k/3; } } @@ -143,8 +143,8 @@ struct InitDynRankViewFunctor { KOKKOS_INLINE_FUNCTION void operator()(const int i) const { - for (unsigned j = 0; j < _inview.dimension(1); ++j) { - for (unsigned k = 0; k < _inview.dimension(2); ++k) { + for (unsigned j = 0; j < _inview.extent(1); ++j) { + for (unsigned k = 0; k < _inview.extent(2); ++k) { _inview(i,j,k) = i/2 -j*j + k/3; } } @@ -163,8 +163,8 @@ struct InitDynRankViewFunctor { KOKKOS_INLINE_FUNCTION void operator()(const int i) const { - for (unsigned j = 0; j < _inview.dimension(1); ++j) { - for (unsigned k = 0; k < _inview.dimension(2); ++k) { + for (unsigned j = 0; j < _inview.extent(1); ++j) { + for (unsigned k = 0; k < _inview.extent(2); ++k) { _outview(i) += _inview(i,j,k) ; } } diff --git a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp index 66f1fbf092..dcaca776be 100644 --- a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp +++ b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp @@ -34,7 +34,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER @@ -76,7 +76,7 @@ struct generate_ids generate_ids( local_id_view & ids) : local_2_global(ids) { - Kokkos::parallel_for(local_2_global.dimension_0(), *this); + Kokkos::parallel_for(local_2_global.extent(0), *this); } @@ -116,7 +116,7 @@ struct fill_map fill_map( global_id_view gIds, local_id_view lIds) : global_2_local(gIds) , local_2_global(lIds) { - Kokkos::parallel_for(local_2_global.dimension_0(), *this); + Kokkos::parallel_for(local_2_global.extent(0), *this); } KOKKOS_INLINE_FUNCTION @@ -143,7 +143,7 @@ struct find_test find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors) : global_2_local(gIds) , local_2_global(lIds) { - Kokkos::parallel_reduce(local_2_global.dimension_0(), *this, num_errors); + Kokkos::parallel_reduce(local_2_global.extent(0), *this, num_errors); } KOKKOS_INLINE_FUNCTION diff --git a/lib/kokkos/containers/performance_tests/TestMain.cpp b/lib/kokkos/containers/performance_tests/TestMain.cpp index 1224af7cdb..217b01a57a 100644 --- a/lib/kokkos/containers/performance_tests/TestMain.cpp +++ b/lib/kokkos/containers/performance_tests/TestMain.cpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp index 012f45bab7..66d497552e 100644 --- a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp +++ b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/containers/performance_tests/TestROCm.cpp b/lib/kokkos/containers/performance_tests/TestROCm.cpp index 6647d23065..3cf9f3bd14 100644 --- a/lib/kokkos/containers/performance_tests/TestROCm.cpp +++ b/lib/kokkos/containers/performance_tests/TestROCm.cpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/containers/performance_tests/TestScatterView.hpp b/lib/kokkos/containers/performance_tests/TestScatterView.hpp index 4fd69173c0..03129d2b09 100644 --- a/lib/kokkos/containers/performance_tests/TestScatterView.hpp +++ b/lib/kokkos/containers/performance_tests/TestScatterView.hpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/containers/performance_tests/TestThreads.cpp b/lib/kokkos/containers/performance_tests/TestThreads.cpp index a8910a3c72..a951a5ca56 100644 --- a/lib/kokkos/containers/performance_tests/TestThreads.cpp +++ b/lib/kokkos/containers/performance_tests/TestThreads.cpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp index 93a206c996..e8734b259d 100644 --- a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp +++ b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp @@ -34,7 +34,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER diff --git a/lib/kokkos/containers/src/Kokkos_Bitset.hpp b/lib/kokkos/containers/src/Kokkos_Bitset.hpp index 7714506e92..c48058d75d 100644 --- a/lib/kokkos/containers/src/Kokkos_Bitset.hpp +++ b/lib/kokkos/containers/src/Kokkos_Bitset.hpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER @@ -147,7 +147,7 @@ public: if (m_last_block_mask) { //clear the unused bits in the last block typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy; - raw_deep_copy( m_blocks.ptr_on_device() + (m_blocks.dimension_0() -1u), &m_last_block_mask, sizeof(unsigned)); + raw_deep_copy( m_blocks.data() + (m_blocks.extent(0) -1u), &m_last_block_mask, sizeof(unsigned)); } } @@ -212,7 +212,7 @@ public: KOKKOS_FORCEINLINE_FUNCTION unsigned max_hint() const { - return m_blocks.dimension_0(); + return m_blocks.extent(0); } /// find a bit set to 1 near the hint @@ -221,10 +221,10 @@ public: KOKKOS_INLINE_FUNCTION Kokkos::pair find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const { - const unsigned block_idx = (hint >> block_shift) < m_blocks.dimension_0() ? (hint >> block_shift) : 0; + const unsigned block_idx = (hint >> block_shift) < m_blocks.extent(0) ? (hint >> block_shift) : 0; const unsigned offset = hint & block_mask; unsigned block = volatile_load(&m_blocks[ block_idx ]); - block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1)) ? block : block & m_last_block_mask ; + block = !m_last_block_mask || (block_idx < (m_blocks.extent(0)-1)) ? block : block & m_last_block_mask ; return find_any_helper(block_idx, offset, block, scan_direction); } @@ -238,7 +238,7 @@ public: const unsigned block_idx = hint >> block_shift; const unsigned offset = hint & block_mask; unsigned block = volatile_load(&m_blocks[ block_idx ]); - block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1) ) ? ~block : ~block & m_last_block_mask ; + block = !m_last_block_mask || (block_idx < (m_blocks.extent(0)-1) ) ? ~block : ~block & m_last_block_mask ; return find_any_helper(block_idx, offset, block, scan_direction); } @@ -281,8 +281,8 @@ private: unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const { block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1; - block_idx = block_idx >= 0 ? block_idx : m_blocks.dimension_0() - 1; - block_idx = block_idx < static_cast(m_blocks.dimension_0()) ? block_idx : 0; + block_idx = block_idx >= 0 ? block_idx : m_blocks.extent(0) - 1; + block_idx = block_idx < static_cast(m_blocks.extent(0)) ? block_idx : 0; return static_cast(block_idx)*block_size + offset; } @@ -407,7 +407,7 @@ void deep_copy( Bitset & dst, Bitset const& src) } typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; - raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0()); + raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0)); } template @@ -418,7 +418,7 @@ void deep_copy( Bitset & dst, ConstBitset const& src) } typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; - raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0()); + raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0)); } template @@ -429,7 +429,7 @@ void deep_copy( ConstBitset & dst, ConstBitset const& src) } typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; - raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0()); + raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0)); } } // namespace Kokkos diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp index 35cc8ec753..74fe4418f8 100644 --- a/lib/kokkos/containers/src/Kokkos_DualView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp @@ -35,7 +35,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER @@ -262,14 +262,14 @@ public: modified_host (View ("DualView::modified_host")) { if ( int(d_view.rank) != int(h_view.rank) || - d_view.dimension_0() != h_view.dimension_0() || - d_view.dimension_1() != h_view.dimension_1() || - d_view.dimension_2() != h_view.dimension_2() || - d_view.dimension_3() != h_view.dimension_3() || - d_view.dimension_4() != h_view.dimension_4() || - d_view.dimension_5() != h_view.dimension_5() || - d_view.dimension_6() != h_view.dimension_6() || - d_view.dimension_7() != h_view.dimension_7() || + d_view.extent(0) != h_view.extent(0) || + d_view.extent(1) != h_view.extent(1) || + d_view.extent(2) != h_view.extent(2) || + d_view.extent(3) != h_view.extent(3) || + d_view.extent(4) != h_view.extent(4) || + d_view.extent(5) != h_view.extent(5) || + d_view.extent(6) != h_view.extent(6) || + d_view.extent(7) != h_view.extent(7) || d_view.stride_0() != h_view.stride_0() || d_view.stride_1() != h_view.stride_1() || d_view.stride_2() != h_view.stride_2() || @@ -503,6 +503,18 @@ public: /* Realloc on Device */ ::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7); + + const bool sizeMismatch = ( h_view.extent(0) != n0 ) || + ( h_view.extent(1) != n1 ) || + ( h_view.extent(2) != n2 ) || + ( h_view.extent(3) != n3 ) || + ( h_view.extent(4) != n4 ) || + ( h_view.extent(5) != n5 ) || + ( h_view.extent(6) != n6 ) || + ( h_view.extent(7) != n7 ); + if ( sizeMismatch ) + ::Kokkos::resize(h_view,n0,n1,n2,n3,n4,n5,n6,n7); + t_host temp_view = create_mirror_view( d_view ); /* Remap on Host */ @@ -510,6 +522,8 @@ public: h_view = temp_view; + d_view = create_mirror_view( typename t_dev::execution_space(), h_view ); + /* Mark Host copy as modified */ modified_host() = modified_host()+1; } @@ -530,22 +544,34 @@ public: d_view.stride(stride_); } + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral::value , size_t >::type + extent( const iType & r ) const + { return d_view.extent(r); } + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral::value , int >::type + extent_int( const iType & r ) const + { return static_cast(d_view.extent(r)); } + /* \brief return size of dimension 0 */ - size_t dimension_0() const {return d_view.dimension_0();} + size_t dimension_0() const {return d_view.extent(0);} /* \brief return size of dimension 1 */ - size_t dimension_1() const {return d_view.dimension_1();} + size_t dimension_1() const {return d_view.extent(1);} /* \brief return size of dimension 2 */ - size_t dimension_2() const {return d_view.dimension_2();} + size_t dimension_2() const {return d_view.extent(2);} /* \brief return size of dimension 3 */ - size_t dimension_3() const {return d_view.dimension_3();} + size_t dimension_3() const {return d_view.extent(3);} /* \brief return size of dimension 4 */ - size_t dimension_4() const {return d_view.dimension_4();} + size_t dimension_4() const {return d_view.extent(4);} /* \brief return size of dimension 5 */ - size_t dimension_5() const {return d_view.dimension_5();} + size_t dimension_5() const {return d_view.extent(5);} /* \brief return size of dimension 6 */ - size_t dimension_6() const {return d_view.dimension_6();} + size_t dimension_6() const {return d_view.extent(6);} /* \brief return size of dimension 7 */ - size_t dimension_7() const {return d_view.dimension_7();} + size_t dimension_7() const {return d_view.extent(7);} //@} }; diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp index d22d6b865d..ccf53b3d50 100644 --- a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -35,16 +35,16 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER */ /// \file Kokkos_DynRankView.hpp -/// \brief Declaration and definition of Kokkos::Experimental::DynRankView. +/// \brief Declaration and definition of Kokkos::DynRankView. /// -/// This header file declares and defines Kokkos::Experimental::DynRankView and its +/// This header file declares and defines Kokkos::DynRankView and its /// related nonmember functions. #ifndef KOKKOS_DYNRANKVIEW_HPP @@ -55,7 +55,6 @@ #include namespace Kokkos { -namespace Experimental { template< typename DataType , class ... Properties > class DynRankView; //forward declare @@ -156,7 +155,7 @@ struct DynRankDimTraits { // Extra overload to match that for specialize types template KOKKOS_INLINE_FUNCTION - static typename std::enable_if< (std::is_same::value || std::is_same::value || std::is_same::value) , typename Traits::array_layout >::type createLayout( const ViewCtorProp& prop, const typename Traits::array_layout& layout ) + static typename std::enable_if< (std::is_same::value || std::is_same::value || std::is_same::value) , typename Traits::array_layout >::type createLayout( const Kokkos::Impl::ViewCtorProp& prop, const typename Traits::array_layout& layout ) { return createLayout( layout ); } @@ -318,7 +317,6 @@ void dyn_rank_view_verify_operator_bounds struct ViewToDynRankViewTag {}; } // namespace Impl -} // namespace Experimental namespace Impl { @@ -348,7 +346,7 @@ class ViewMapping< DstTraits , SrcTraits , ) ) ) - ) , Kokkos::Experimental::Impl::ViewToDynRankViewTag >::type > + ) , Kokkos::Impl::ViewToDynRankViewTag >::type > { private: @@ -375,7 +373,7 @@ public: template < typename DT , typename ... DP , typename ST , typename ... SP > KOKKOS_INLINE_FUNCTION - static void assign( Kokkos::Experimental::DynRankView< DT , DP...> & dst , const Kokkos::View< ST , SP... > & src ) + static void assign( Kokkos::DynRankView< DT , DP...> & dst , const Kokkos::View< ST , SP... > & src ) { static_assert( is_assignable_value_type , "View assignment must have same value type or const = non-const" ); @@ -395,8 +393,6 @@ public: } //end Impl -namespace Experimental { - /* \class DynRankView * \brief Container that creates a Kokkos view with rank determined at runtime. * Essentially this is a rank 7 view @@ -415,7 +411,7 @@ namespace Experimental { template< class > struct is_dyn_rank_view : public std::false_type {}; template< class D, class ... P > -struct is_dyn_rank_view< Kokkos::Experimental::DynRankView > : public std::true_type {}; +struct is_dyn_rank_view< Kokkos::DynRankView > : public std::true_type {}; template< typename DataType , class ... Properties > @@ -425,7 +421,7 @@ class DynRankView : public ViewTraits< DataType , Properties ... > private: template < class , class ... > friend class DynRankView ; - template < class , class ... > friend class Impl::ViewMapping ; + template < class , class ... > friend class Kokkos::Impl::ViewMapping ; public: typedef ViewTraits< DataType , Properties ... > drvtraits ; @@ -437,7 +433,7 @@ public: private: typedef Kokkos::Impl::ViewMapping< traits , void > map_type ; - typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; + typedef Kokkos::Impl::SharedAllocationTracker track_type ; track_type m_track ; map_type m_map ; @@ -601,7 +597,7 @@ private: // rank of the calling operator - included as first argument in ARG #define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \ DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \ - Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds< typename traits::memory_space > ARG ; + Kokkos::Impl::dyn_rank_view_verify_operator_bounds< typename traits::memory_space > ARG ; #else @@ -778,6 +774,140 @@ public: return m_map.reference(i0,i1,i2,i3,i4,i5,i6); } + // Rank 0 + KOKKOS_INLINE_FUNCTION + reference_type access() const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank(), m_track, m_map) ) + return implementation_map().reference(); + //return m_map.reference(0,0,0,0,0,0,0); + } + + // Rank 1 + // Rank 1 parenthesis + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same::value && std::is_integral::value), reference_type>::type + access(const iType & i0 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) + return m_map.reference(i0); + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + access(const iType & i0 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) + return m_map.reference(i0,0,0,0,0,0,0); + } + + // Rank 2 + template< typename iType0 , typename iType1 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value), reference_type>::type + access(const iType0 & i0 , const iType1 & i1 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) + return m_map.reference(i0,i1); + } + + template< typename iType0 , typename iType1 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + access(const iType0 & i0 , const iType1 & i1 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) + return m_map.reference(i0,i1,0,0,0,0,0); + } + + // Rank 3 + template< typename iType0 , typename iType1 , typename iType2 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) + return m_map.reference(i0,i1,i2); + } + + template< typename iType0 , typename iType1 , typename iType2 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) + return m_map.reference(i0,i1,i2,0,0,0,0); + } + + // Rank 4 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) + return m_map.reference(i0,i1,i2,i3); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) + return m_map.reference(i0,i1,i2,i3,0,0,0); + } + + // Rank 5 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) + return m_map.reference(i0,i1,i2,i3,i4); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) + return m_map.reference(i0,i1,i2,i3,i4,0,0); + } + + // Rank 6 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) + return m_map.reference(i0,i1,i2,i3,i4,i5); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,0); + } + + // Rank 7 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 , typename iType6 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5, i6) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,i6); + } + #undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY //---------------------------------------- @@ -830,7 +960,6 @@ public: return *this; } -// Experimental // Copy/Assign View to DynRankView template< class RT , class ... RP > KOKKOS_INLINE_FUNCTION @@ -840,7 +969,7 @@ public: , m_rank( rhs.Rank ) { typedef typename View::traits SrcTraits ; - typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag > Mapping ; + typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Impl::ViewToDynRankViewTag > Mapping ; static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); Mapping::assign( *this , rhs ); } @@ -850,7 +979,7 @@ public: DynRankView & operator = ( const View & rhs ) { typedef typename View::traits SrcTraits ; - typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag > Mapping ; + typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Impl::ViewToDynRankViewTag > Mapping ; static_assert( Mapping::is_assignable , "Incompatible View to DynRankView copy assignment" ); Mapping::assign( *this , rhs ); return *this ; @@ -872,8 +1001,8 @@ public: // unused arg_layout dimensions must be set to ~size_t(0) so that rank deduction can properly take place template< class ... P > explicit inline - DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop - , typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer + DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< ! Kokkos::Impl::ViewCtorProp< P... >::has_pointer , typename traits::array_layout >::type const & arg_layout ) @@ -882,11 +1011,11 @@ public: , m_rank( Impl::DynRankDimTraits::template computeRank< typename traits::array_layout, P...>(arg_prop, arg_layout) ) { // Append layout and spaces if not input - typedef Impl::ViewCtorProp< P ... > alloc_prop_input ; + typedef Kokkos::Impl::ViewCtorProp< P ... > alloc_prop_input ; // use 'std::integral_constant' for non-types // to avoid duplicate class error. - typedef Impl::ViewCtorProp + typedef Kokkos::Impl::ViewCtorProp < P ... , typename std::conditional < alloc_prop_input::has_label @@ -931,7 +1060,7 @@ public: #endif //------------------------------------------------------------ - Kokkos::Experimental::Impl::SharedAllocationRecord<> * + Kokkos::Impl::SharedAllocationRecord<> * record = m_map.allocate_shared( prop , Impl::DynRankDimTraits::template createLayout(arg_prop, arg_layout) ); //------------------------------------------------------------ @@ -950,8 +1079,8 @@ public: // Wrappers template< class ... P > explicit KOKKOS_INLINE_FUNCTION - DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop - , typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer + DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< Kokkos::Impl::ViewCtorProp< P... >::has_pointer , typename traits::array_layout >::type const & arg_layout ) @@ -972,8 +1101,8 @@ public: // Simple dimension-only layout template< class ... P > explicit inline - DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop - , typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer + DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< ! Kokkos::Impl::ViewCtorProp< P... >::has_pointer , size_t >::type const arg_N0 = ~size_t(0) , const size_t arg_N1 = ~size_t(0) @@ -992,8 +1121,8 @@ public: template< class ... P > explicit KOKKOS_INLINE_FUNCTION - DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop - , typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer + DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< Kokkos::Impl::ViewCtorProp< P... >::has_pointer , size_t >::type const arg_N0 = ~size_t(0) , const size_t arg_N1 = ~size_t(0) @@ -1015,10 +1144,10 @@ public: explicit inline DynRankView( const Label & arg_label , typename std::enable_if< - Kokkos::Experimental::Impl::is_view_label